###################################################### REPLICATION CODE FOR "VALIDATING WORDSCORES" BRUINSMA-GEMENIS ######################################################

# This code expects a main folder "Replication Files" with in there a folder "Manifestos" in which each country has its own folder with the .pdf manifestos
# The file "benchmarks.csv" is in the main folder
# Scans using Tesseract OCR will appear in the main "Manifestos" folder - these can safely be deleted after the analysis

# Note that the following changes have been made and manifesto's have been reassigned. These changes also apply to the benchmarks.
# The names on the left are the names of the parties in the manifestos, the names on the right the ones that are used here and in the article.
# This is to account for parties changing coalition or changing name.

# UDF 2009 = MODEM-UDI 2009
# PCF 2009 = FG2009
# ALTRA 2009 = PRC 2009
# PD 04 = ULIVO 2004
# PDL 2009 = FI2009
# LPPLC 04 = LC 04
# PSD04 = PSDPP 04
# LKS 2009 = PCTVL 2009

# Load required packages into R

# R version 3.5.1 (2018-07-02) -- "Feather Spray" was used for the article

library(readr)
library(quanteda)
# Note that Quanteda is still being updated regularly.
# Here, version 1.3.4 was used, though it is expected that in the future the results remain the same, the commands might slightly change.

library(readtext)
library(pdftools)
library(tesseract)
# Note that the tesseract package requires a fair number of other packages to install.
# Please see https://github.com/tesseract-ocr/tesseract/wiki#windows for how to install Tesseract. Afterwards, you can install the package.

library(rlist)
library(stopwords)
library(scales)
library(epiR)
library(ggplot2)
library(ggpubr)
library(stringi)
library(mlogit)
library(mnlogit)
library(berryFunctions)

# Set seed

set.seed(42)

setwd("~/Downloads/Replication Files")

################################################################################## Calculate Wordscores ################################################################################

# Here, the wordscores are calculated for each country and lateron collected into a single file.
# During the analysis, a new folder "Wordscores" is created
# Quanteda is used to calculate the scores

# There are three main errors during the analysis. These can both be safely ignored.

#1 :  X features in newdata not used in prediction. 
# This error occurs because Wordscores uses only the words from the virgin texts that are also in the reference texts

#2: In predict.textmodel_wordscores(ws_x_lr_bl, rescaling = "mv") :
#  More than two reference scores found with MV rescaling; using only min, max values.
#  This error occurs because the MV scaling only requires two values

#3: PDF error: Invalid Font Weight
# This error is related to the reading of the pdf files and can be ignored

mainDir <- "~/Downloads/Replication Files"
subDir <- "Wordscores"

dir.create(file.path(mainDir, subDir))

setwd("~/Downloads/Replication Files/Manifestos")

################################################################################## Austria ################################################################################

FPO_04 <- pdf_convert("Austria/FPO_04.pdf", dpi = 1200)
FPO_04 <- ocr(FPO_04, engine = "deu")

GRUNEN_04 <- pdf_text("Austria/GRUNEN_04.pdf")
OVP_04 <- pdf_text("Austria/OVP_04.pdf")
SPO_04 <- pdf_text("Austria/SPO_04.pdf")
FPO_09 <- pdf_text("Austria/FPO_09.pdf")
GRUNEN_09 <- pdf_text("Austria/GRUNEN_09.pdf")
OVP_09 <- pdf_text("Austria/OVP_09.pdf")
SPO_09 <- pdf_text("Austria/SPO_09.pdf")

write(FPO_04, "Austria/FPO_04.txt")
write(GRUNEN_04, "Austria/GRUNEN_04.txt")
write(OVP_04, "Austria/OVP_04.txt")
write(SPO_04, "Austria/SPO_04.txt")
write(FPO_09, "Austria/FPO_09.txt")
write(GRUNEN_09, "Austria/GRUNEN_09.txt")
write(OVP_09, "Austria/OVP_09.txt")
write(SPO_09, "Austria/SPO_09.txt")

austria_texts <- readtext("Austria/*.txt")
austria_corpus <- corpus(austria_texts)
austria_dfm <- dfm(austria_corpus)
is.dfm(austria_dfm)
austria <- austria_dfm

# Cleaning

austria <- dfm_tolower(austria)
austria <- dfm_select(austria,"[[:punct:]]", selection = "remove", valuetype = "regex", verbose = TRUE)
austria <- dfm_select(austria,"[[:digit:]]", selection = "remove", valuetype = "regex", verbose = TRUE)
austria <- dfm_select(austria,"[[:cntrl:]]", selection = "remove", valuetype = "regex", verbose = TRUE)
austria <- dfm_select(austria,"[[:space:]]", selection = "remove", valuetype = "regex", verbose = TRUE)
austria <- dfm_select(austria,"[[:blank:]]", selection = "remove", valuetype = "regex", verbose = TRUE)
austria <- dfm_select(austria, stopwords(language = "de", source = "stopwords-iso"), selection = "remove", valuetype = "fixed", verbose = TRUE)

austria_dfm_words <- as.data.frame(ntoken(austria_dfm))
austria_dfm_uniquewords <- as.data.frame(ntype(austria_dfm))

# Run Wordscores

# Austria

austria@Dimnames$docs
scores_austria_lr_bl <- c( 17.375, NA,  5.4375 , NA, 14.3125  , NA,8.75 , NA  )
scores_austria_eu_bl <- c( 4.6875 , NA, 12.0625 , NA, 11.625, NA,  11.666667 , NA  )
scores_austria_ec_bl <- c( 13.875 , NA, 5.875 , NA, 14.6875 , NA, 7.5 , NA )
scores_austria_so_bl <- c( 16.75 , NA, 3.875 , NA, 16.3125 , NA, 6.75 , NA  )

scores_austria_lr_ches <- c( 8.630000114441, NA,  2.829999923706 , NA, 7 , NA, 3.75 , NA  )
scores_austria_eu_ches <- c( 2.630000114441 , NA, 6.130000114441, NA,  6.880000114441, NA,  6.380000114441 , NA  )
scores_austria_ec_ches <- c( 7.3099999 , NA, 2.8299999 , NA, 7.5599999, NA,  3.4400001  , NA )
scores_austria_so_ches <- c( 9 , NA, 1.38, NA,  8.25 , NA, 3.3800001 , NA  )

scores_austria_lr_emp <- c( 10 , NA, 2 , NA, 9  , NA, 3  , NA)
scores_austria_eu_emp <- c( 1 , NA, 9 , NA, 9.01 , NA, 4  , NA )
scores_austria_ec_emp <- c( 8 , NA, 1  , NA, 7 , NA,  3 , NA)
scores_austria_so_emp <- c( 7 , NA, 2 , NA, 6 , NA, 3  , NA)

ws_austria_lr_bl <- textmodel_wordscores(austria, scores_austria_lr_bl)
ws_austria_eu_bl <- textmodel_wordscores(austria, scores_austria_eu_bl)
ws_austria_ec_bl <- textmodel_wordscores(austria, scores_austria_ec_bl)
ws_austria_so_bl <- textmodel_wordscores(austria, scores_austria_so_bl)

ws_austria_lr_ches <- textmodel_wordscores(austria, scores_austria_lr_ches)
ws_austria_eu_ches <- textmodel_wordscores(austria, scores_austria_eu_ches)
ws_austria_ec_ches <- textmodel_wordscores(austria, scores_austria_ec_ches)
ws_austria_so_ches <- textmodel_wordscores(austria, scores_austria_so_ches)

ws_austria_lr_emp <- textmodel_wordscores(austria, scores_austria_lr_emp)
ws_austria_eu_emp <- textmodel_wordscores(austria, scores_austria_eu_emp)
ws_austria_ec_emp <- textmodel_wordscores(austria, scores_austria_ec_emp)
ws_austria_so_emp <- textmodel_wordscores(austria, scores_austria_so_emp)


austria_lr_bl_lbg <- as.data.frame(predict(ws_austria_lr_bl, rescaling = "lbg"))
austria_eu_bl_lbg <- as.data.frame(predict(ws_austria_eu_bl, rescaling = "lbg"))
austria_ec_bl_lbg <- as.data.frame(predict(ws_austria_ec_bl, rescaling = "lbg"))
austria_so_bl_lbg <- as.data.frame(predict(ws_austria_so_bl, rescaling = "lbg"))

austria_lr_ches_lbg <- as.data.frame(predict(ws_austria_lr_ches, rescaling = "lbg"))
austria_eu_ches_lbg <- as.data.frame(predict(ws_austria_eu_ches, rescaling = "lbg"))
austria_ec_ches_lbg <- as.data.frame(predict(ws_austria_ec_ches, rescaling = "lbg"))
austria_so_ches_lbg <- as.data.frame(predict(ws_austria_so_ches, rescaling = "lbg"))

austria_lr_emp_lbg <- as.data.frame(predict(ws_austria_lr_emp, rescaling = "lbg"))
austria_eu_emp_lbg <- as.data.frame(predict(ws_austria_eu_emp, rescaling = "lbg"))
austria_ec_emp_lbg <- as.data.frame(predict(ws_austria_ec_emp, rescaling = "lbg"))
austria_so_emp_lbg <- as.data.frame(predict(ws_austria_so_emp, rescaling = "lbg"))

austria_lr_bl_mv <- as.data.frame(predict(ws_austria_lr_bl, rescaling = "mv"))
austria_eu_bl_mv <- as.data.frame(predict(ws_austria_eu_bl, rescaling = "mv"))
austria_ec_bl_mv <- as.data.frame(predict(ws_austria_ec_bl, rescaling = "mv"))
austria_so_bl_mv <- as.data.frame(predict(ws_austria_so_bl, rescaling = "mv"))

austria_lr_ches_mv <- as.data.frame(predict(ws_austria_lr_ches, rescaling = "mv"))
austria_eu_ches_mv <- as.data.frame(predict(ws_austria_eu_ches, rescaling = "mv"))
austria_ec_ches_mv <- as.data.frame(predict(ws_austria_ec_ches, rescaling = "mv"))
austria_so_ches_mv <- as.data.frame(predict(ws_austria_so_ches, rescaling = "mv"))

austria_lr_emp_mv <- as.data.frame(predict(ws_austria_lr_emp, rescaling = "mv"))
austria_eu_emp_mv <- as.data.frame(predict(ws_austria_eu_emp, rescaling = "mv"))
austria_ec_emp_mv <- as.data.frame(predict(ws_austria_ec_emp, rescaling = "mv"))
austria_so_emp_mv <- as.data.frame(predict(ws_austria_so_emp, rescaling = "mv"))

names(austria_lr_bl_lbg)[1] <- "bl_lr_lbg"
names(austria_eu_bl_lbg)[1] <- "bl_eu_lbg"
names(austria_ec_bl_lbg)[1] <- "bl_ec_lbg"
names(austria_so_bl_lbg)[1] <- "bl_so_lbg"

names(austria_lr_ches_lbg)[1] <- "ches_lr_lbg"
names(austria_eu_ches_lbg)[1] <- "ches_eu_lbg"
names(austria_ec_ches_lbg)[1] <- "ches_ec_lbg"
names(austria_so_ches_lbg)[1] <- "ches_so_lbg"

names(austria_lr_emp_lbg)[1] <- "emp_lr_lbg"
names(austria_eu_emp_lbg)[1] <- "emp_eu_lbg"
names(austria_ec_emp_lbg)[1] <- "emp_ec_lbg"
names(austria_so_emp_lbg)[1] <- "emp_so_lbg"

names(austria_lr_bl_mv)[1] <- "bl_lr_mv"
names(austria_eu_bl_mv)[1] <- "bl_eu_mv"
names(austria_ec_bl_mv)[1] <- "bl_ec_mv"
names(austria_so_bl_mv)[1] <- "bl_so_mv"

names(austria_lr_ches_mv)[1] <- "ches_lr_mv"
names(austria_eu_ches_mv)[1] <- "ches_eu_mv"
names(austria_ec_ches_mv)[1] <- "ches_ec_mv"
names(austria_so_ches_mv)[1] <- "ches_so_mv"

names(austria_lr_emp_mv)[1] <- "emp_lr_mv"
names(austria_eu_emp_mv)[1] <- "emp_eu_mv"
names(austria_ec_emp_mv)[1] <- "emp_ec_mv"
names(austria_so_emp_mv)[1] <- "emp_so_mv"

austria_wordscores <- cbind(austria_lr_bl_lbg, austria_eu_bl_lbg, austria_ec_bl_lbg, austria_so_bl_lbg,austria_lr_ches_lbg, austria_eu_ches_lbg, austria_ec_ches_lbg, austria_so_ches_lbg,austria_lr_emp_lbg, austria_eu_emp_lbg, austria_ec_emp_lbg, austria_so_emp_lbg,austria_lr_bl_mv, austria_eu_bl_mv, austria_ec_bl_mv, austria_so_bl_mv,austria_lr_ches_mv, austria_eu_ches_mv, austria_ec_ches_mv, austria_so_ches_mv,austria_lr_emp_mv, austria_eu_emp_mv, austria_ec_emp_mv, austria_so_emp_mv)
austria_wordscores <- as.matrix(austria_wordscores)
austria_wordscores <- as.data.frame(austria_wordscores)
setwd("~/Downloads/Replication Files/Wordscores/")
write.csv(austria_wordscores, file= "austria_wordscores.csv")
setwd("~/Downloads/Replication Files/Manifestos/")

################################################################################## Belgium (FR) ################################################################################


CDH_04 <- pdf_text("Belgium(FR)/CDH_04.pdf")
ECOLO_04 <- pdf_text("Belgium(FR)/ECOLO_04.pdf")
MR_04 <- pdf_text("Belgium(FR)/MR_04.pdf")
PS_04 <- pdf_text("Belgium(FR)/PS_04.pdf")
CDH_09 <- pdf_text("Belgium(FR)/CDH_09.pdf")
ECOLO_09 <- pdf_text("Belgium(FR)/ECOLO_09.pdf")
MR_09 <- pdf_text("Belgium(FR)/MR_09.pdf")
PS_09 <- pdf_text("Belgium(FR)/PS_09.pdf")

write(CDH_04, "Belgium(FR)/CDH_04.txt")
write(ECOLO_04, "Belgium(FR)/ECOLO_04.txt")
write(MR_04, "Belgium(FR)/MR_04.txt")
write(PS_04, "Belgium(FR)/PS_04.txt")
write(CDH_09, "Belgium(FR)/CDH_09.txt")
write(ECOLO_09, "Belgium(FR)/ECOLO_09.txt")
write(MR_09, "Belgium(FR)/MR_09.txt")
write(PS_09, "Belgium(FR)/PS_09.txt")

belgiumfr_texts <- readtext("Belgium(FR)/*.txt")
belgiumfr_corpus <- corpus(belgiumfr_texts)
belgiumfr_dfm <- dfm(belgiumfr_corpus)
is.dfm(belgiumfr_dfm)
belgiumfr <- belgiumfr_dfm

# Cleaning

belgiumfr <- dfm_tolower(belgiumfr)
belgiumfr <- dfm_select(belgiumfr,"[[:punct:]]", selection = "remove", valuetype = "regex", verbose = TRUE)
belgiumfr <- dfm_select(belgiumfr,"[[:digit:]]", selection = "remove", valuetype = "regex", verbose = TRUE)
belgiumfr <- dfm_select(belgiumfr,"[[:cntrl:]]", selection = "remove", valuetype = "regex", verbose = TRUE)
belgiumfr <- dfm_select(belgiumfr,"[[:space:]]", selection = "remove", valuetype = "regex", verbose = TRUE)
belgiumfr <- dfm_select(belgiumfr,"[[:blank:]]", selection = "remove", valuetype = "regex", verbose = TRUE)
belgiumfr <- dfm_select(belgiumfr, stopwords(language = "fr", source = "stopwords-iso"), selection = "remove", valuetype = "fixed", verbose = TRUE)

belgiumfr_dfm_words <- as.data.frame(ntoken(belgiumfr_dfm))
belgiumfr_dfm_uniquewords <- as.data.frame(ntype(belgiumfr_dfm))

# Run Wordscores

# Belgium(FR)

belgiumfr@Dimnames$docs
scores_belgiumfr_lr_bl <- c( 10.64999961853, NA,  3.450000047684 , NA, 12.69999980927  , NA,4.400000095367 , NA  )
scores_belgiumfr_eu_bl <- c( 14.235294 , NA, 14.222222 , NA, 12.764706, NA,   12.947369 , NA  )
scores_belgiumfr_ec_bl <- c( 9.3684206 , NA, 4.6190476 , NA, 14.25  , NA, 4.9523811 , NA )
scores_belgiumfr_so_bl <- c( 13.473684  , NA, 2.8571429 , NA, 7.6999998 , NA, 4.7619047 , NA  )

scores_belgiumfr_lr_ches <- c( 5.650000095367, NA,  2.55999994278 , NA, 6.349999904633  , NA, 3.349999904633 , NA  )
scores_belgiumfr_eu_ches <- c(6.300000190735, NA,   5.900000095367 , NA, 6.219999790192  , NA, 6.090000152588 , NA)
scores_belgiumfr_ec_ches <- c(6 , NA, 2.4000001 , NA, 7.3000002 , NA, 2.5   , NA)
scores_belgiumfr_so_ches <- c(6.9000001 , NA, 1.6  , NA, 4.5999999  , NA,  4 , NA )

scores_belgiumfr_lr_emp <- c(7 , NA, 3, NA,  3  , NA, 2, NA)
scores_belgiumfr_eu_emp <- c(9.001, NA,  9.002 , NA, 6 , NA, 8.999, NA)
scores_belgiumfr_ec_emp <- c(8 , NA, 6, NA,  4 , NA, 3, NA)
scores_belgiumfr_so_emp <- c(4.1 , NA, 4.2 , NA, 4  , NA, 3.9, NA)

ws_belgiumfr_lr_bl <- textmodel_wordscores(belgiumfr, scores_belgiumfr_lr_bl)
ws_belgiumfr_eu_bl <- textmodel_wordscores(belgiumfr, scores_belgiumfr_eu_bl)
ws_belgiumfr_ec_bl <- textmodel_wordscores(belgiumfr, scores_belgiumfr_ec_bl)
ws_belgiumfr_so_bl <- textmodel_wordscores(belgiumfr, scores_belgiumfr_so_bl)

ws_belgiumfr_lr_ches <- textmodel_wordscores(belgiumfr, scores_belgiumfr_lr_ches)
ws_belgiumfr_eu_ches <- textmodel_wordscores(belgiumfr, scores_belgiumfr_eu_ches)
ws_belgiumfr_ec_ches <- textmodel_wordscores(belgiumfr, scores_belgiumfr_ec_ches)
ws_belgiumfr_so_ches <- textmodel_wordscores(belgiumfr, scores_belgiumfr_so_ches)

ws_belgiumfr_lr_emp <- textmodel_wordscores(belgiumfr, scores_belgiumfr_lr_emp)
ws_belgiumfr_eu_emp <- textmodel_wordscores(belgiumfr, scores_belgiumfr_eu_emp)
ws_belgiumfr_ec_emp <- textmodel_wordscores(belgiumfr, scores_belgiumfr_ec_emp)
ws_belgiumfr_so_emp <- textmodel_wordscores(belgiumfr, scores_belgiumfr_so_emp)


belgiumfr_lr_bl_lbg <- as.data.frame(predict(ws_belgiumfr_lr_bl, rescaling = "lbg"))
belgiumfr_eu_bl_lbg <- as.data.frame(predict(ws_belgiumfr_eu_bl, rescaling = "lbg"))
belgiumfr_ec_bl_lbg <- as.data.frame(predict(ws_belgiumfr_ec_bl, rescaling = "lbg"))
belgiumfr_so_bl_lbg <- as.data.frame(predict(ws_belgiumfr_so_bl, rescaling = "lbg"))

belgiumfr_lr_ches_lbg <- as.data.frame(predict(ws_belgiumfr_lr_ches, rescaling = "lbg"))
belgiumfr_eu_ches_lbg <- as.data.frame(predict(ws_belgiumfr_eu_ches, rescaling = "lbg"))
belgiumfr_ec_ches_lbg <- as.data.frame(predict(ws_belgiumfr_ec_ches, rescaling = "lbg"))
belgiumfr_so_ches_lbg <- as.data.frame(predict(ws_belgiumfr_so_ches, rescaling = "lbg"))

belgiumfr_lr_emp_lbg <- as.data.frame(predict(ws_belgiumfr_lr_emp, rescaling = "lbg"))
belgiumfr_eu_emp_lbg <- as.data.frame(predict(ws_belgiumfr_eu_emp, rescaling = "lbg"))
belgiumfr_ec_emp_lbg <- as.data.frame(predict(ws_belgiumfr_ec_emp, rescaling = "lbg"))
belgiumfr_so_emp_lbg <- as.data.frame(predict(ws_belgiumfr_so_emp, rescaling = "lbg"))

belgiumfr_lr_bl_mv <- as.data.frame(predict(ws_belgiumfr_lr_bl, rescaling = "mv"))
belgiumfr_eu_bl_mv <- as.data.frame(predict(ws_belgiumfr_eu_bl, rescaling = "mv"))
belgiumfr_ec_bl_mv <- as.data.frame(predict(ws_belgiumfr_ec_bl, rescaling = "mv"))
belgiumfr_so_bl_mv <- as.data.frame(predict(ws_belgiumfr_so_bl, rescaling = "mv"))

belgiumfr_lr_ches_mv <- as.data.frame(predict(ws_belgiumfr_lr_ches, rescaling = "mv"))
belgiumfr_eu_ches_mv <- as.data.frame(predict(ws_belgiumfr_eu_ches, rescaling = "mv"))
belgiumfr_ec_ches_mv <- as.data.frame(predict(ws_belgiumfr_ec_ches, rescaling = "mv"))
belgiumfr_so_ches_mv <- as.data.frame(predict(ws_belgiumfr_so_ches, rescaling = "mv"))

belgiumfr_lr_emp_mv <- as.data.frame(predict(ws_belgiumfr_lr_emp, rescaling = "mv"))
belgiumfr_eu_emp_mv <- as.data.frame(predict(ws_belgiumfr_eu_emp, rescaling = "mv"))
belgiumfr_ec_emp_mv <- as.data.frame(predict(ws_belgiumfr_ec_emp, rescaling = "mv"))
belgiumfr_so_emp_mv <- as.data.frame(predict(ws_belgiumfr_so_emp, rescaling = "mv"))

names(belgiumfr_lr_bl_lbg)[1] <- "bl_lr_lbg"
names(belgiumfr_eu_bl_lbg)[1] <- "bl_eu_lbg"
names(belgiumfr_ec_bl_lbg)[1] <- "bl_ec_lbg"
names(belgiumfr_so_bl_lbg)[1] <- "bl_so_lbg"

names(belgiumfr_lr_ches_lbg)[1] <- "ches_lr_lbg"
names(belgiumfr_eu_ches_lbg)[1] <- "ches_eu_lbg"
names(belgiumfr_ec_ches_lbg)[1] <- "ches_ec_lbg"
names(belgiumfr_so_ches_lbg)[1] <- "ches_so_lbg"

names(belgiumfr_lr_emp_lbg)[1] <- "emp_lr_lbg"
names(belgiumfr_eu_emp_lbg)[1] <- "emp_eu_lbg"
names(belgiumfr_ec_emp_lbg)[1] <- "emp_ec_lbg"
names(belgiumfr_so_emp_lbg)[1] <- "emp_so_lbg"

names(belgiumfr_lr_bl_mv)[1] <- "bl_lr_mv"
names(belgiumfr_eu_bl_mv)[1] <- "bl_eu_mv"
names(belgiumfr_ec_bl_mv)[1] <- "bl_ec_mv"
names(belgiumfr_so_bl_mv)[1] <- "bl_so_mv"

names(belgiumfr_lr_ches_mv)[1] <- "ches_lr_mv"
names(belgiumfr_eu_ches_mv)[1] <- "ches_eu_mv"
names(belgiumfr_ec_ches_mv)[1] <- "ches_ec_mv"
names(belgiumfr_so_ches_mv)[1] <- "ches_so_mv"

names(belgiumfr_lr_emp_mv)[1] <- "emp_lr_mv"
names(belgiumfr_eu_emp_mv)[1] <- "emp_eu_mv"
names(belgiumfr_ec_emp_mv)[1] <- "emp_ec_mv"
names(belgiumfr_so_emp_mv)[1] <- "emp_so_mv"

belgiumfr_wordscores <- cbind(belgiumfr_lr_bl_lbg, belgiumfr_eu_bl_lbg, belgiumfr_ec_bl_lbg, belgiumfr_so_bl_lbg,belgiumfr_lr_ches_lbg, belgiumfr_eu_ches_lbg, belgiumfr_ec_ches_lbg, belgiumfr_so_ches_lbg,belgiumfr_lr_emp_lbg, belgiumfr_eu_emp_lbg, belgiumfr_ec_emp_lbg, belgiumfr_so_emp_lbg,belgiumfr_lr_bl_mv, belgiumfr_eu_bl_mv, belgiumfr_ec_bl_mv, belgiumfr_so_bl_mv,belgiumfr_lr_ches_mv, belgiumfr_eu_ches_mv, belgiumfr_ec_ches_mv, belgiumfr_so_ches_mv,belgiumfr_lr_emp_mv, belgiumfr_eu_emp_mv, belgiumfr_ec_emp_mv, belgiumfr_so_emp_mv)
belgiumfr_wordscores <- as.matrix(belgiumfr_wordscores)
belgiumfr_wordscores <- as.data.frame(belgiumfr_wordscores)
setwd("~/Downloads/Replication Files/Wordscores/")
write.csv(belgiumfr_wordscores, file= "belgiumfr_wordscores.csv")
setwd("~/Downloads/Replication Files/Manifestos/")


################################################################################## Belgium (NL) ################################################################################

GROEN_04 <- pdf_convert("Belgium(NL)/GROEN_04.pdf", dpi = 600)
GROEN_04 <- ocr(GROEN_04, engine = "nld")

CDV_04 <- pdf_text("Belgium(NL)/CDV_04.pdf")
NVA_04 <- pdf_text("Belgium(NL)/NVA_04.pdf")
OPENVLD_04 <- pdf_text("Belgium(NL)/OPENVLD_04.pdf")
SPA_04 <- pdf_text("Belgium(NL)/SPA_04.pdf")
VB_04 <- pdf_text("Belgium(NL)/VB_04.pdf")

CDV_09 <- pdf_text("Belgium(NL)/CDV_09.pdf")
GROEN_09 <- pdf_text("Belgium(NL)/GROEN_09.pdf")
NVA_09 <- pdf_text("Belgium(NL)/NVA_09.pdf")
OPENVLD_09 <- pdf_text("Belgium(NL)/OPENVLD_09.pdf")
SPA_09 <- pdf_text("Belgium(NL)/SPA_09.pdf")
VB_09 <- pdf_text("Belgium(NL)/VB_09.pdf")

write(GROEN_04, "Belgium(NL)/GROEN_04.txt")
write(CDV_04, "Belgium(NL)/CDV_04.txt")
write(NVA_04, "Belgium(NL)/NVA_04.txt")
write(OPENVLD_04, "Belgium(NL)/OPENVLD_04.txt")
write(SPA_04, "Belgium(NL)/SPA_04.txt")
write(VB_04, "Belgium(NL)/VB_04.txt")
write(GROEN_09, "Belgium(NL)/GROEN_09.txt")
write(CDV_09, "Belgium(NL)/CDV_09.txt")
write(NVA_09, "Belgium(NL)/NVA_09.txt")
write(OPENVLD_09, "Belgium(NL)/OPENVLD_09.txt")
write(SPA_09, "Belgium(NL)/SPA_09.txt")
write(VB_09, "Belgium(NL)/VB_09.txt")

belgiumnl_texts <- readtext("Belgium(NL)/*.txt")
belgiumnl_corpus <- corpus(belgiumnl_texts)
belgiumnl_dfm <- dfm(belgiumnl_corpus)
is.dfm(belgiumnl_dfm)
belgiumnl <- belgiumnl_dfm

# Cleaning

belgiumnl <- dfm_tolower(belgiumnl)
belgiumnl <- dfm_select(belgiumnl,"[[:punct:]]", selection = "remove", valuetype = "regex", verbose = TRUE)
belgiumnl <- dfm_select(belgiumnl,"[[:digit:]]", selection = "remove", valuetype = "regex", verbose = TRUE)
belgiumnl <- dfm_select(belgiumnl,"[[:cntrl:]]", selection = "remove", valuetype = "regex", verbose = TRUE)
belgiumnl <- dfm_select(belgiumnl,"[[:space:]]", selection = "remove", valuetype = "regex", verbose = TRUE)
belgiumnl <- dfm_select(belgiumnl,"[[:blank:]]", selection = "remove", valuetype = "regex", verbose = TRUE)
belgiumnl <- dfm_select(belgiumnl, stopwords(language = "nl", source = "stopwords-iso"), selection = "remove", valuetype = "fixed", verbose = TRUE)

belgiumnl_dfm_words <- as.data.frame(ntoken(belgiumnl_dfm))
belgiumnl_dfm_uniquewords <- as.data.frame(ntype(belgiumnl_dfm))

# Run Wordscores

# Belgium(NL)

belgiumnl@Dimnames$docs
scores_belgiumnl_lr_bl <- c(12.31818199158 ,NA, 3.5 ,NA, 14.38888931274 ,NA, 14.5 ,NA, 6.636363506317,NA,     18.86363601685  ,NA)
scores_belgiumnl_eu_bl <- c( 14.31579 ,NA, 13.85   ,NA,11.142858  ,NA,12.9   ,NA,  12.590909  ,NA,  5.1666698,NA)
scores_belgiumnl_ec_bl <- c(  10.913043 ,NA, 4.478261  ,NA, 11.875 ,NA, 16.391304 ,NA,  7.304348  ,NA,  14.3 ,NA)
scores_belgiumnl_so_bl <- c(  14.695652,NA,  2.2608695,NA,  12.4 ,NA, 7.0434785  ,NA, 4.652174  ,NA,   19 ,NA )

scores_belgiumnl_lr_ches <- c(  5.949999809265  ,NA, 2.640000104904 ,NA, 6.219999790192,NA,  6.230000019073 ,NA,  3.5  ,NA,   9.550000190735 ,NA  )
scores_belgiumnl_eu_ches <- c( 6.639999866486 ,NA,  5.949999809265 ,NA, 5 ,NA, 6.539999961853 ,NA,  5.449999809265  ,NA,  2.730000019073  ,NA)
scores_belgiumnl_ec_ches <- c( 5.8200002 ,NA, 2.45 ,NA, 5.6700001 ,NA, 7.4499998 ,NA, 3   ,NA,   7.9000001 ,NA)
scores_belgiumnl_so_ches <- c( 7 ,NA, 1.55 ,NA, 5.6700001,NA,  3.55  ,NA, 3.27 ,NA,  9.3699999 ,NA)

scores_belgiumnl_lr_emp <- c( 6 ,NA, 3.01 ,NA, 4 ,NA, 8.01 ,NA,  2.99  ,NA,   8 ,NA)
scores_belgiumnl_eu_emp <- c( 9 ,NA, 9.01 ,NA,  7  ,NA, 8,NA,  8 ,NA,   3  ,NA)
scores_belgiumnl_ec_emp <- c(7 ,NA, 6,NA,  5 ,NA, 9 ,NA, 3 ,NA,  6   ,NA)
scores_belgiumnl_so_emp <- c( 4 ,NA, 4,NA,  3 ,NA,  2 ,NA,  5.01 ,NA, 5  ,NA)

ws_belgiumnl_lr_bl <- textmodel_wordscores(belgiumnl, scores_belgiumnl_lr_bl)
ws_belgiumnl_eu_bl <- textmodel_wordscores(belgiumnl, scores_belgiumnl_eu_bl)
ws_belgiumnl_ec_bl <- textmodel_wordscores(belgiumnl, scores_belgiumnl_ec_bl)
ws_belgiumnl_so_bl <- textmodel_wordscores(belgiumnl, scores_belgiumnl_so_bl)

ws_belgiumnl_lr_ches <- textmodel_wordscores(belgiumnl, scores_belgiumnl_lr_ches)
ws_belgiumnl_eu_ches <- textmodel_wordscores(belgiumnl, scores_belgiumnl_eu_ches)
ws_belgiumnl_ec_ches <- textmodel_wordscores(belgiumnl, scores_belgiumnl_ec_ches)
ws_belgiumnl_so_ches <- textmodel_wordscores(belgiumnl, scores_belgiumnl_so_ches)

ws_belgiumnl_lr_emp <- textmodel_wordscores(belgiumnl, scores_belgiumnl_lr_emp)
ws_belgiumnl_eu_emp <- textmodel_wordscores(belgiumnl, scores_belgiumnl_eu_emp)
ws_belgiumnl_ec_emp <- textmodel_wordscores(belgiumnl, scores_belgiumnl_ec_emp)
ws_belgiumnl_so_emp <- textmodel_wordscores(belgiumnl, scores_belgiumnl_so_emp)


belgiumnl_lr_bl_lbg <- as.data.frame(predict(ws_belgiumnl_lr_bl, rescaling = "lbg"))
belgiumnl_eu_bl_lbg <- as.data.frame(predict(ws_belgiumnl_eu_bl, rescaling = "lbg"))
belgiumnl_ec_bl_lbg <- as.data.frame(predict(ws_belgiumnl_ec_bl, rescaling = "lbg"))
belgiumnl_so_bl_lbg <- as.data.frame(predict(ws_belgiumnl_so_bl, rescaling = "lbg"))

belgiumnl_lr_ches_lbg <- as.data.frame(predict(ws_belgiumnl_lr_ches, rescaling = "lbg"))
belgiumnl_eu_ches_lbg <- as.data.frame(predict(ws_belgiumnl_eu_ches, rescaling = "lbg"))
belgiumnl_ec_ches_lbg <- as.data.frame(predict(ws_belgiumnl_ec_ches, rescaling = "lbg"))
belgiumnl_so_ches_lbg <- as.data.frame(predict(ws_belgiumnl_so_ches, rescaling = "lbg"))

belgiumnl_lr_emp_lbg <- as.data.frame(predict(ws_belgiumnl_lr_emp, rescaling = "lbg"))
belgiumnl_eu_emp_lbg <- as.data.frame(predict(ws_belgiumnl_eu_emp, rescaling = "lbg"))
belgiumnl_ec_emp_lbg <- as.data.frame(predict(ws_belgiumnl_ec_emp, rescaling = "lbg"))
belgiumnl_so_emp_lbg <- as.data.frame(predict(ws_belgiumnl_so_emp, rescaling = "lbg"))

belgiumnl_lr_bl_mv <- as.data.frame(predict(ws_belgiumnl_lr_bl, rescaling = "mv"))
belgiumnl_eu_bl_mv <- as.data.frame(predict(ws_belgiumnl_eu_bl, rescaling = "mv"))
belgiumnl_ec_bl_mv <- as.data.frame(predict(ws_belgiumnl_ec_bl, rescaling = "mv"))
belgiumnl_so_bl_mv <- as.data.frame(predict(ws_belgiumnl_so_bl, rescaling = "mv"))

belgiumnl_lr_ches_mv <- as.data.frame(predict(ws_belgiumnl_lr_ches, rescaling = "mv"))
belgiumnl_eu_ches_mv <- as.data.frame(predict(ws_belgiumnl_eu_ches, rescaling = "mv"))
belgiumnl_ec_ches_mv <- as.data.frame(predict(ws_belgiumnl_ec_ches, rescaling = "mv"))
belgiumnl_so_ches_mv <- as.data.frame(predict(ws_belgiumnl_so_ches, rescaling = "mv"))

belgiumnl_lr_emp_mv <- as.data.frame(predict(ws_belgiumnl_lr_emp, rescaling = "mv"))
belgiumnl_eu_emp_mv <- as.data.frame(predict(ws_belgiumnl_eu_emp, rescaling = "mv"))
belgiumnl_ec_emp_mv <- as.data.frame(predict(ws_belgiumnl_ec_emp, rescaling = "mv"))
belgiumnl_so_emp_mv <- as.data.frame(predict(ws_belgiumnl_so_emp, rescaling = "mv"))

names(belgiumnl_lr_bl_lbg)[1] <- "bl_lr_lbg"
names(belgiumnl_eu_bl_lbg)[1] <- "bl_eu_lbg"
names(belgiumnl_ec_bl_lbg)[1] <- "bl_ec_lbg"
names(belgiumnl_so_bl_lbg)[1] <- "bl_so_lbg"

names(belgiumnl_lr_ches_lbg)[1] <- "ches_lr_lbg"
names(belgiumnl_eu_ches_lbg)[1] <- "ches_eu_lbg"
names(belgiumnl_ec_ches_lbg)[1] <- "ches_ec_lbg"
names(belgiumnl_so_ches_lbg)[1] <- "ches_so_lbg"

names(belgiumnl_lr_emp_lbg)[1] <- "emp_lr_lbg"
names(belgiumnl_eu_emp_lbg)[1] <- "emp_eu_lbg"
names(belgiumnl_ec_emp_lbg)[1] <- "emp_ec_lbg"
names(belgiumnl_so_emp_lbg)[1] <- "emp_so_lbg"

names(belgiumnl_lr_bl_mv)[1] <- "bl_lr_mv"
names(belgiumnl_eu_bl_mv)[1] <- "bl_eu_mv"
names(belgiumnl_ec_bl_mv)[1] <- "bl_ec_mv"
names(belgiumnl_so_bl_mv)[1] <- "bl_so_mv"

names(belgiumnl_lr_ches_mv)[1] <- "ches_lr_mv"
names(belgiumnl_eu_ches_mv)[1] <- "ches_eu_mv"
names(belgiumnl_ec_ches_mv)[1] <- "ches_ec_mv"
names(belgiumnl_so_ches_mv)[1] <- "ches_so_mv"

names(belgiumnl_lr_emp_mv)[1] <- "emp_lr_mv"
names(belgiumnl_eu_emp_mv)[1] <- "emp_eu_mv"
names(belgiumnl_ec_emp_mv)[1] <- "emp_ec_mv"
names(belgiumnl_so_emp_mv)[1] <- "emp_so_mv"

belgiumnl_wordscores <- cbind(belgiumnl_lr_bl_lbg, belgiumnl_eu_bl_lbg, belgiumnl_ec_bl_lbg, belgiumnl_so_bl_lbg,belgiumnl_lr_ches_lbg, belgiumnl_eu_ches_lbg, belgiumnl_ec_ches_lbg, belgiumnl_so_ches_lbg,belgiumnl_lr_emp_lbg, belgiumnl_eu_emp_lbg, belgiumnl_ec_emp_lbg, belgiumnl_so_emp_lbg,belgiumnl_lr_bl_mv, belgiumnl_eu_bl_mv, belgiumnl_ec_bl_mv, belgiumnl_so_bl_mv,belgiumnl_lr_ches_mv, belgiumnl_eu_ches_mv, belgiumnl_ec_ches_mv, belgiumnl_so_ches_mv,belgiumnl_lr_emp_mv, belgiumnl_eu_emp_mv, belgiumnl_ec_emp_mv, belgiumnl_so_emp_mv)
belgiumnl_wordscores <- as.matrix(belgiumnl_wordscores)
belgiumnl_wordscores <- as.data.frame(belgiumnl_wordscores)
setwd("~/Downloads/Replication Files/Wordscores/")
write.csv(belgiumnl_wordscores, file= "belgiumnl_wordscores.csv")
setwd("~/Downloads/Replication Files/Manifestos/")


################################################################################## Cyprus ################################################################################

AKEL_04 <- pdf_convert("Cyprus/AKEL_04.pdf")
AKEL_04 <- ocr(AKEL_04, engine = "Greek")
DIKO_04 <- pdf_convert("Cyprus/DIKO_04.pdf")
DIKO_04 <- ocr(DIKO_04, engine = "Greek")
DISY_04 <- pdf_convert("Cyprus/DISY_04.pdf", dpi = 50)
DISY_04 <- ocr(DISY_04, engine = "Greek")
EDEK_04 <- pdf_convert("Cyprus/EDEK_04.pdf")
EDEK_04 <- ocr(EDEK_04, engine = "Greek")

AKEL_09 <- pdf_text("Cyprus/AKEL_09.pdf")
DIKO_09 <- pdf_text("Cyprus/DIKO_09.pdf")
DISY_09 <- pdf_text("Cyprus/DISY_09.pdf")
EDEK_09 <- pdf_text("Cyprus/EDEK_09.pdf")

write(AKEL_04, "Cyprus/AKEL_04.txt")
write(DIKO_04, "Cyprus/DIKO_04.txt")
write(DISY_04, "Cyprus/DISY_04.txt")
write(EDEK_04, "Cyprus/EDEK_04.txt")
write(AKEL_09, "Cyprus/AKEL_09.txt")
write(DIKO_09, "Cyprus/DIKO_09.txt")
write(DISY_09, "Cyprus/DISY_09.txt")
write(EDEK_09, "Cyprus/EDEK_09.txt")

cyprus_texts <- readtext("Cyprus/*.txt")
cyprus_corpus <- corpus(cyprus_texts)
cyprus_dfm <- dfm(cyprus_corpus)
is.dfm(cyprus_dfm)
cyprus <- cyprus_dfm

# Cleaning

cyprus <- dfm_tolower(cyprus)
cyprus <- dfm_select(cyprus,"[[:punct:]]", selection = "remove", valuetype = "regex", verbose = TRUE)
cyprus <- dfm_select(cyprus,"[[:digit:]]", selection = "remove", valuetype = "regex", verbose = TRUE)
cyprus <- dfm_select(cyprus,"[[:cntrl:]]", selection = "remove", valuetype = "regex", verbose = TRUE)
cyprus <- dfm_select(cyprus,"[[:space:]]", selection = "remove", valuetype = "regex", verbose = TRUE)
cyprus <- dfm_select(cyprus,"[[:blank:]]", selection = "remove", valuetype = "regex", verbose = TRUE)
cyprus <- dfm_select(cyprus, stopwords(language = "el", source = "stopwords-iso"), selection = "remove", valuetype = "fixed", verbose = TRUE)

cyprus_dfm_words <- as.data.frame(ntoken(cyprus_dfm))
cyprus_dfm_uniquewords <- as.data.frame(ntype(cyprus_dfm))


# Run Wordscores

# Cyprus

cyprus@Dimnames$docs

scores_cyprus_lr_bl <- c(3 ,NA, 10.75  ,NA, 17.5 ,NA,  7.25 ,NA)
scores_cyprus_ec_bl <- c(6 ,NA, 8.75 ,NA, 15  ,NA,  10  ,NA)
scores_cyprus_so_bl <- c(12.75 ,NA, 12.5 ,NA, 10.5  ,NA, 12.25 ,NA)

scores_cyprus_lr_emp <- c(2.001 ,NA,  7.0001 ,NA, 7.001,NA,  3 ,NA)
scores_cyprus_eu_emp <- c(7.001 ,NA,  8  ,NA, 9  ,NA,8,NA)
scores_cyprus_ec_emp <- c(2 ,NA,  7 ,NA,   6 ,NA,  4,NA)
scores_cyprus_so_emp <- c(1.00001  ,NA, 1.99999 ,NA, 2  ,NA,  2.00001,NA)

ws_cyprus_lr_bl <- textmodel_wordscores(cyprus, scores_cyprus_lr_bl)
ws_cyprus_ec_bl <- textmodel_wordscores(cyprus, scores_cyprus_ec_bl)
ws_cyprus_so_bl <- textmodel_wordscores(cyprus, scores_cyprus_so_bl)

ws_cyprus_lr_emp <- textmodel_wordscores(cyprus, scores_cyprus_lr_emp)
ws_cyprus_eu_emp <- textmodel_wordscores(cyprus, scores_cyprus_eu_emp)
ws_cyprus_ec_emp <- textmodel_wordscores(cyprus, scores_cyprus_ec_emp)
ws_cyprus_so_emp <- textmodel_wordscores(cyprus, scores_cyprus_so_emp)


cyprus_lr_bl_lbg <- as.data.frame(predict(ws_cyprus_lr_bl, rescaling = "lbg"))
cyprus_eu_bl_lbg <-data.frame(y = c(NA,NA,NA,NA,NA,NA,NA,NA), stringsAsFactors=FALSE)
cyprus_ec_bl_lbg <- as.data.frame(predict(ws_cyprus_ec_bl, rescaling = "lbg"))
cyprus_so_bl_lbg <- as.data.frame(predict(ws_cyprus_so_bl, rescaling = "lbg"))

cyprus_lr_ches_lbg <- data.frame(y = c(NA,NA,NA,NA,NA,NA,NA,NA), stringsAsFactors=FALSE)
cyprus_eu_ches_lbg <- data.frame(y = c(NA,NA,NA,NA,NA,NA,NA,NA), stringsAsFactors=FALSE)
cyprus_ec_ches_lbg <- data.frame(y = c(NA,NA,NA,NA,NA,NA,NA,NA), stringsAsFactors=FALSE)
cyprus_so_ches_lbg <- data.frame(y = c(NA,NA,NA,NA,NA,NA,NA,NA), stringsAsFactors=FALSE)

cyprus_lr_emp_lbg <- as.data.frame(predict(ws_cyprus_lr_emp, rescaling = "lbg"))
cyprus_eu_emp_lbg <- as.data.frame(predict(ws_cyprus_eu_emp, rescaling = "lbg"))
cyprus_ec_emp_lbg <- as.data.frame(predict(ws_cyprus_ec_emp, rescaling = "lbg"))
cyprus_so_emp_lbg <- as.data.frame(predict(ws_cyprus_so_emp, rescaling = "lbg"))

cyprus_lr_bl_mv <- as.data.frame(predict(ws_cyprus_lr_bl, rescaling = "mv"))
cyprus_eu_bl_mv <- data.frame(y = c(NA,NA,NA,NA,NA,NA,NA,NA), stringsAsFactors=FALSE)
cyprus_ec_bl_mv <- as.data.frame(predict(ws_cyprus_ec_bl, rescaling = "mv"))
cyprus_so_bl_mv <- as.data.frame(predict(ws_cyprus_so_bl, rescaling = "mv"))

cyprus_lr_ches_mv <- data.frame(y = c(NA,NA,NA,NA,NA,NA,NA,NA), stringsAsFactors=FALSE)
cyprus_eu_ches_mv <- data.frame(y = c(NA,NA,NA,NA,NA,NA,NA,NA), stringsAsFactors=FALSE)
cyprus_ec_ches_mv <- data.frame(y = c(NA,NA,NA,NA,NA,NA,NA,NA), stringsAsFactors=FALSE)
cyprus_so_ches_mv <- data.frame(y = c(NA,NA,NA,NA,NA,NA,NA,NA), stringsAsFactors=FALSE)

cyprus_lr_emp_mv <- as.data.frame(predict(ws_cyprus_lr_emp, rescaling = "mv"))
cyprus_eu_emp_mv <- as.data.frame(predict(ws_cyprus_eu_emp, rescaling = "mv"))
cyprus_ec_emp_mv <- as.data.frame(predict(ws_cyprus_ec_emp, rescaling = "mv"))
cyprus_so_emp_mv <- as.data.frame(predict(ws_cyprus_so_emp, rescaling = "mv"))

names(cyprus_lr_bl_lbg)[1] <- "bl_lr_lbg"
names(cyprus_eu_bl_lbg)[1] <- "bl_eu_lbg"
names(cyprus_ec_bl_lbg)[1] <- "bl_ec_lbg"
names(cyprus_so_bl_lbg)[1] <- "bl_so_lbg"

names(cyprus_lr_ches_lbg)[1] <- "ches_lr_lbg"
names(cyprus_eu_ches_lbg)[1] <- "ches_eu_lbg"
names(cyprus_ec_ches_lbg)[1] <- "ches_ec_lbg"
names(cyprus_so_ches_lbg)[1] <- "ches_so_lbg"

names(cyprus_lr_emp_lbg)[1] <- "emp_lr_lbg"
names(cyprus_eu_emp_lbg)[1] <- "emp_eu_lbg"
names(cyprus_ec_emp_lbg)[1] <- "emp_ec_lbg"
names(cyprus_so_emp_lbg)[1] <- "emp_so_lbg"

names(cyprus_lr_bl_mv)[1] <- "bl_lr_mv"
names(cyprus_eu_bl_mv)[1] <- "bl_eu_mv"
names(cyprus_ec_bl_mv)[1] <- "bl_ec_mv"
names(cyprus_so_bl_mv)[1] <- "bl_so_mv"

names(cyprus_lr_ches_mv)[1] <- "ches_lr_mv"
names(cyprus_eu_ches_mv)[1] <- "ches_eu_mv"
names(cyprus_ec_ches_mv)[1] <- "ches_ec_mv"
names(cyprus_so_ches_mv)[1] <- "ches_so_mv"

names(cyprus_lr_emp_mv)[1] <- "emp_lr_mv"
names(cyprus_eu_emp_mv)[1] <- "emp_eu_mv"
names(cyprus_ec_emp_mv)[1] <- "emp_ec_mv"
names(cyprus_so_emp_mv)[1] <- "emp_so_mv"

cyprus_wordscores <- cbind(cyprus_lr_bl_lbg, cyprus_eu_bl_lbg, cyprus_ec_bl_lbg, cyprus_so_bl_lbg,cyprus_lr_ches_lbg, cyprus_eu_ches_lbg, cyprus_ec_ches_lbg, cyprus_so_ches_lbg,cyprus_lr_emp_lbg, cyprus_eu_emp_lbg, cyprus_ec_emp_lbg, cyprus_so_emp_lbg,cyprus_lr_bl_mv, cyprus_eu_bl_mv, cyprus_ec_bl_mv, cyprus_so_bl_mv,cyprus_lr_ches_mv, cyprus_eu_ches_mv, cyprus_ec_ches_mv, cyprus_so_ches_mv,cyprus_lr_emp_mv, cyprus_eu_emp_mv, cyprus_ec_emp_mv, cyprus_so_emp_mv)
cyprus_wordscores <- as.matrix(cyprus_wordscores)
cyprus_wordscores <- as.data.frame(cyprus_wordscores)
setwd("~/Downloads/Replication Files/Wordscores/")
write.csv(cyprus_wordscores, file= "cyprus_wordscores.csv")
setwd("~/Downloads/Replication Files/Manifestos/")


################################################################################## Czech ################################################################################

KSCM_04 <- pdf_convert("Czech/KSCM_04.pdf")
KSCM_04 <- ocr(KSCM_04, engine = "ces")

CSSD_04 <- pdf_text("Czech/CSSD_04.pdf")
KDUCSL_04 <- pdf_text("Czech/KDUCSL_04.pdf")
ODS_04 <- pdf_text("Czech/ODS_04.pdf")

KSCM_09 <- pdf_text("Czech/KSCM_09.pdf")
CSSD_09 <- pdf_text("Czech/CSSD_09.pdf")
KDUCSL_09 <- pdf_text("Czech/KDUCSL_09.pdf")
ODS_09 <- pdf_text("Czech/ODS_09.pdf")

write(CSSD_04, "Czech/CSSD_04.txt")
write(CSSD_09, "Czech/CSSD_09.txt")
write(KSCM_04, "Czech/KSCM_04.txt")
write(KSCM_09, "Czech/KSCM_09.txt")
write(KDUCSL_04, "Czech/KDUCSL_04.txt")
write(KDUCSL_09, "Czech/KDUCSL_09.txt")
write(ODS_04, "Czech/ODS_04.txt")
write(ODS_09, "Czech/ODS_09.txt")

czech_texts <- readtext("Czech/*.txt")
czech_corpus <- corpus(czech_texts)
czech_dfm <- dfm(czech_corpus)
is.dfm(czech_dfm)
czech <- czech_dfm

# Cleaning

czech <- dfm_tolower(czech)
czech <- dfm_select(czech,"[[:punct:]]", selection = "remove", valuetype = "regex", verbose = TRUE)
czech <- dfm_select(czech,"[[:digit:]]", selection = "remove", valuetype = "regex", verbose = TRUE)
czech <- dfm_select(czech,"[[:cntrl:]]", selection = "remove", valuetype = "regex", verbose = TRUE)
czech <- dfm_select(czech,"[[:space:]]", selection = "remove", valuetype = "regex", verbose = TRUE)
czech <- dfm_select(czech,"[[:blank:]]", selection = "remove", valuetype = "regex", verbose = TRUE)
czech <- dfm_select(czech, stopwords(language = "cs", source = "stopwords-iso"), selection = "remove", valuetype = "fixed", verbose = TRUE)

czech_dfm_words <- as.data.frame(ntoken(czech_dfm))
czech_dfm_uniquewords <- as.data.frame(ntype(czech_dfm))

# Run Wordscores

# Czech

czech@Dimnames$docs
scores_czech_lr_bl <- c(   7.444444656372 , NA , 11.13888931274, NA ,  2.638888835907  , NA ,   15.5, NA )
scores_czech_ec_bl <- c(   6.1666665 , NA , 9.75 , NA , 4.4444447   , NA ,  16.305555, NA  )
scores_czech_so_bl <- c(   7.0588236 , NA , 18.111111 , NA ,  8.1999998 , NA , 9.333333 , NA  )

scores_czech_lr_ches <- c( 3.329999923706 , NA , 4.860000133514  , NA , .910000026226 , NA ,   7.079999923706, NA   )
scores_czech_eu_ches <- c(  6.45, NA ,  6.44000005722 , NA ,   2.72000002861, NA ,  3.77999997139  , NA  )
scores_czech_ec_ches <- c(  3, NA ,  7.9200001   , NA , .86000001 , NA ,  7.6399999 , NA  )
scores_czech_so_ches <- c(  4.6900001, NA ,  7.5599999 , NA ,   7.8200002, NA ,  3.8900001  , NA  )

scores_czech_lr_emp <- c(  4 , NA , 8, NA ,  3 , NA ,     10, NA )
scores_czech_eu_emp <- c(  10 , NA , 8 , NA , 5  , NA , 2 , NA  )
scores_czech_ec_emp <- c(  6 , NA , 8 , NA , 3  , NA ,   10 , NA )
scores_czech_so_emp <- c(  3 , NA , 8 , NA , 10 , NA , NA , NA)

ws_czech_lr_bl <- textmodel_wordscores(czech, scores_czech_lr_bl)
ws_czech_ec_bl <- textmodel_wordscores(czech, scores_czech_ec_bl)
ws_czech_so_bl <- textmodel_wordscores(czech, scores_czech_so_bl)

ws_czech_lr_ches <- textmodel_wordscores(czech, scores_czech_lr_ches)
ws_czech_eu_ches <- textmodel_wordscores(czech, scores_czech_eu_ches)
ws_czech_ec_ches <- textmodel_wordscores(czech, scores_czech_ec_ches)
ws_czech_so_ches <- textmodel_wordscores(czech, scores_czech_so_ches)

ws_czech_lr_emp <- textmodel_wordscores(czech, scores_czech_lr_emp)
ws_czech_eu_emp <- textmodel_wordscores(czech, scores_czech_eu_emp)
ws_czech_ec_emp <- textmodel_wordscores(czech, scores_czech_ec_emp)
ws_czech_so_emp <- textmodel_wordscores(czech, scores_czech_so_emp)


czech_lr_bl_lbg <- as.data.frame(predict(ws_czech_lr_bl, rescaling = "lbg"))
czech_eu_bl_lbg <-data.frame(y = c(NA,NA,NA,NA,NA,NA,NA,NA), stringsAsFactors=FALSE)
czech_ec_bl_lbg <- as.data.frame(predict(ws_czech_ec_bl, rescaling = "lbg"))
czech_so_bl_lbg <- as.data.frame(predict(ws_czech_so_bl, rescaling = "lbg"))

czech_lr_ches_lbg <- as.data.frame(predict(ws_czech_lr_ches, rescaling = "lbg"))
czech_eu_ches_lbg <- as.data.frame(predict(ws_czech_eu_ches, rescaling = "lbg"))
czech_ec_ches_lbg <- as.data.frame(predict(ws_czech_ec_ches, rescaling = "lbg"))
czech_so_ches_lbg <- as.data.frame(predict(ws_czech_so_ches, rescaling = "lbg"))

czech_lr_emp_lbg <- as.data.frame(predict(ws_czech_lr_emp, rescaling = "lbg"))
czech_eu_emp_lbg <- as.data.frame(predict(ws_czech_eu_emp, rescaling = "lbg"))
czech_ec_emp_lbg <- as.data.frame(predict(ws_czech_ec_emp, rescaling = "lbg"))
czech_so_emp_lbg <- as.data.frame(predict(ws_czech_so_emp, rescaling = "lbg"))

czech_lr_bl_mv <- as.data.frame(predict(ws_czech_lr_bl, rescaling = "mv"))
czech_eu_bl_mv <-data.frame(y = c(NA,NA,NA,NA,NA,NA,NA,NA), stringsAsFactors=FALSE)
czech_ec_bl_mv <- as.data.frame(predict(ws_czech_ec_bl, rescaling = "mv"))
czech_so_bl_mv <- as.data.frame(predict(ws_czech_so_bl, rescaling = "mv"))

czech_lr_ches_mv <- as.data.frame(predict(ws_czech_lr_ches, rescaling = "mv"))
czech_eu_ches_mv <- as.data.frame(predict(ws_czech_eu_ches, rescaling = "mv"))
czech_ec_ches_mv <- as.data.frame(predict(ws_czech_ec_ches, rescaling = "mv"))
czech_so_ches_mv <- as.data.frame(predict(ws_czech_so_ches, rescaling = "mv"))

czech_lr_emp_mv <- as.data.frame(predict(ws_czech_lr_emp, rescaling = "mv"))
czech_eu_emp_mv <- as.data.frame(predict(ws_czech_eu_emp, rescaling = "mv"))
czech_ec_emp_mv <- as.data.frame(predict(ws_czech_ec_emp, rescaling = "mv"))
czech_so_emp_mv <- as.data.frame(predict(ws_czech_so_emp, rescaling = "mv"))

names(czech_lr_bl_lbg)[1] <- "bl_lr_lbg"
names(czech_eu_bl_lbg)[1] <- "bl_eu_lbg"
names(czech_ec_bl_lbg)[1] <- "bl_ec_lbg"
names(czech_so_bl_lbg)[1] <- "bl_so_lbg"

names(czech_lr_ches_lbg)[1] <- "ches_lr_lbg"
names(czech_eu_ches_lbg)[1] <- "ches_eu_lbg"
names(czech_ec_ches_lbg)[1] <- "ches_ec_lbg"
names(czech_so_ches_lbg)[1] <- "ches_so_lbg"

names(czech_lr_emp_lbg)[1] <- "emp_lr_lbg"
names(czech_eu_emp_lbg)[1] <- "emp_eu_lbg"
names(czech_ec_emp_lbg)[1] <- "emp_ec_lbg"
names(czech_so_emp_lbg)[1] <- "emp_so_lbg"

names(czech_lr_bl_mv)[1] <- "bl_lr_mv"
names(czech_eu_bl_mv)[1] <- "bl_eu_mv"
names(czech_ec_bl_mv)[1] <- "bl_ec_mv"
names(czech_so_bl_mv)[1] <- "bl_so_mv"

names(czech_lr_ches_mv)[1] <- "ches_lr_mv"
names(czech_eu_ches_mv)[1] <- "ches_eu_mv"
names(czech_ec_ches_mv)[1] <- "ches_ec_mv"
names(czech_so_ches_mv)[1] <- "ches_so_mv"

names(czech_lr_emp_mv)[1] <- "emp_lr_mv"
names(czech_eu_emp_mv)[1] <- "emp_eu_mv"
names(czech_ec_emp_mv)[1] <- "emp_ec_mv"
names(czech_so_emp_mv)[1] <- "emp_so_mv"

czech_wordscores <- cbind(czech_lr_bl_lbg, czech_eu_bl_lbg, czech_ec_bl_lbg, czech_so_bl_lbg,czech_lr_ches_lbg, czech_eu_ches_lbg, czech_ec_ches_lbg, czech_so_ches_lbg,czech_lr_emp_lbg, czech_eu_emp_lbg, czech_ec_emp_lbg, czech_so_emp_lbg,czech_lr_bl_mv, czech_eu_bl_mv, czech_ec_bl_mv, czech_so_bl_mv,czech_lr_ches_mv, czech_eu_ches_mv, czech_ec_ches_mv, czech_so_ches_mv,czech_lr_emp_mv, czech_eu_emp_mv, czech_ec_emp_mv, czech_so_emp_mv)
czech_wordscores <- as.matrix(czech_wordscores)
czech_wordscores <- as.data.frame(czech_wordscores)
setwd("~/Downloads/Replication Files/Wordscores/")
write.csv(czech_wordscores, file= "czech_wordscores.csv")
setwd("~/Downloads/Replication Files/Manifestos/")


################################################################################## Denmark ################################################################################

C_09 <- pdf_convert("Denmark/C_09.pdf")
C_09 <- ocr(C_09, engine = "dan")

A_04 <- pdf_text("Denmark/A_04.pdf")
B_04 <- pdf_text("Denmark/B_04.pdf")
C_04 <- pdf_text("Denmark/C_04.pdf")
F_04 <- pdf_text("Denmark/F_04.pdf")
O_04 <- pdf_text("Denmark/O_04.pdf")
V_04 <- pdf_text("Denmark/V_04.pdf")

A_09 <- pdf_text("Denmark/A_09.pdf")
B_09 <- pdf_text("Denmark/B_09.pdf")
F_09 <- pdf_text("Denmark/F_09.pdf")
O_09 <- pdf_text("Denmark/O_09.pdf")
V_09 <- pdf_text("Denmark/V_09.pdf")

write(A_04, "Denmark/A_04.txt")
write(B_04, "Denmark/B_04.txt")
write(C_04, "Denmark/C_04.txt")
write(F_04, "Denmark/F_04.txt")
write(O_04, "Denmark/O_04.txt")
write(V_04, "Denmark/V_04.txt")
write(A_09, "Denmark/A_09.txt")
write(B_09, "Denmark/B_09.txt")
write(C_09, "Denmark/C_09.txt")
write(F_09, "Denmark/F_09.txt")
write(O_09, "Denmark/O_09.txt")
write(V_09, "Denmark/V_09.txt")

denmark_texts <- readtext("Denmark/*.txt")
denmark_corpus <- corpus(denmark_texts)
denmark_dfm <- dfm(denmark_corpus)
is.dfm(denmark_dfm)
denmark <- denmark_dfm

# Cleaning

denmark <- dfm_tolower(denmark)
denmark <- dfm_select(denmark,"[[:punct:]]", selection = "remove", valuetype = "regex", verbose = TRUE)
denmark <- dfm_select(denmark,"[[:digit:]]", selection = "remove", valuetype = "regex", verbose = TRUE)
denmark <- dfm_select(denmark,"[[:cntrl:]]", selection = "remove", valuetype = "regex", verbose = TRUE)
denmark <- dfm_select(denmark,"[[:space:]]", selection = "remove", valuetype = "regex", verbose = TRUE)
denmark <- dfm_select(denmark,"[[:blank:]]", selection = "remove", valuetype = "regex", verbose = TRUE)
denmark <- dfm_select(denmark, stopwords(language = "da", source = "stopwords-iso"), selection = "remove", valuetype = "fixed", verbose = TRUE)

denmark_dfm_words <- as.data.frame(ntoken(denmark_dfm))
denmark_dfm_uniquewords <- as.data.frame(ntype(denmark_dfm))


# Run Wordscores

# Denmark

denmark@Dimnames$docs
scores_denmark_lr_bl <- c( 7.576922893524 ,NA, 9.346154212952  ,NA,15.19230747223  ,NA, 4.576922893524 ,NA, 15.30769252777  ,NA,15.07692337036 ,NA)
scores_denmark_eu_bl <- c(  12.75,NA,  14.416667,NA,  13.5 ,NA, 6.4583302  ,NA,2.4583299 ,NA,       15.125 ,NA )
scores_denmark_ec_bl <- c(   7.4000001 ,NA, 10.36 ,NA, 15.32 ,NA,   4.8400002 ,NA, 9.9615383 ,NA,  14.84 ,NA
 )
scores_denmark_so_bl <- c(   7.4000001 ,NA, 10.36 ,NA, 15.32 ,NA,   4.8400002 ,NA, 9.9615383 ,NA,  14.84 ,NA )

scores_denmark_lr_ches <- c(  4 ,NA, 5.079999923706 ,NA, 7  ,NA, 2.30999994278  ,NA, 8.85000038147  ,NA,   7.380000114441 ,NA
)
scores_denmark_eu_ches <- c( 5.769999980927 ,NA, 5.769999980927 ,NA,  5.539999961853  ,NA, 2.920000076294 ,NA,  1.620000004768 ,NA,   6.619999885559,NA )
scores_denmark_ec_ches <- c(  3.6900001 ,NA, 5.3099999,NA,  6.8499999 ,NA, 2   ,NA,  6.0799999 ,NA,  7.8499999,NA
 )
scores_denmark_so_ches <- c(  4.3800001 ,NA, 2.54 ,NA, 7.23  ,NA, 2.1500001 ,NA, 8.9200001  ,NA, 4.8499999   ,NA )

scores_denmark_lr_emp <- c(  3 ,NA, 4 ,NA, 8 ,NA, 2   ,NA,  9 ,NA,   8,NA)
scores_denmark_eu_emp <- c(   9 ,NA, 8 ,NA, 8.99 ,NA,   8 ,NA,  2  ,NA,  9.001,NA)
scores_denmark_ec_emp <- c(  2.01 ,NA, 3,NA,  7,NA,  2 ,NA,  8  ,NA, 4   ,NA
)
scores_denmark_so_emp <- c( 2.999 ,NA, 3.003 ,NA, 3.001  ,NA,  3.002,NA,   8,NA,  2.998 ,NA)

ws_denmark_lr_bl <- textmodel_wordscores(denmark, scores_denmark_lr_bl)
ws_denmark_eu_bl <- textmodel_wordscores(denmark, scores_denmark_eu_bl)
ws_denmark_ec_bl <- textmodel_wordscores(denmark, scores_denmark_ec_bl)
ws_denmark_so_bl <- textmodel_wordscores(denmark, scores_denmark_so_bl)

ws_denmark_lr_ches <- textmodel_wordscores(denmark, scores_denmark_lr_ches)
ws_denmark_eu_ches <- textmodel_wordscores(denmark, scores_denmark_eu_ches)
ws_denmark_ec_ches <- textmodel_wordscores(denmark, scores_denmark_ec_ches)
ws_denmark_so_ches <- textmodel_wordscores(denmark, scores_denmark_so_ches)

ws_denmark_lr_emp <- textmodel_wordscores(denmark, scores_denmark_lr_emp)
ws_denmark_eu_emp <- textmodel_wordscores(denmark, scores_denmark_eu_emp)
ws_denmark_ec_emp <- textmodel_wordscores(denmark, scores_denmark_ec_emp)
ws_denmark_so_emp <- textmodel_wordscores(denmark, scores_denmark_so_emp)


denmark_lr_bl_lbg <- as.data.frame(predict(ws_denmark_lr_bl, rescaling = "lbg"))
denmark_eu_bl_lbg <- as.data.frame(predict(ws_denmark_eu_bl, rescaling = "lbg"))
denmark_ec_bl_lbg <- as.data.frame(predict(ws_denmark_ec_bl, rescaling = "lbg"))
denmark_so_bl_lbg <- as.data.frame(predict(ws_denmark_so_bl, rescaling = "lbg"))

denmark_lr_ches_lbg <- as.data.frame(predict(ws_denmark_lr_ches, rescaling = "lbg"))
denmark_eu_ches_lbg <- as.data.frame(predict(ws_denmark_eu_ches, rescaling = "lbg"))
denmark_ec_ches_lbg <- as.data.frame(predict(ws_denmark_ec_ches, rescaling = "lbg"))
denmark_so_ches_lbg <- as.data.frame(predict(ws_denmark_so_ches, rescaling = "lbg"))

denmark_lr_emp_lbg <- as.data.frame(predict(ws_denmark_lr_emp, rescaling = "lbg"))
denmark_eu_emp_lbg <- as.data.frame(predict(ws_denmark_eu_emp, rescaling = "lbg"))
denmark_ec_emp_lbg <- as.data.frame(predict(ws_denmark_ec_emp, rescaling = "lbg"))
denmark_so_emp_lbg <- as.data.frame(predict(ws_denmark_so_emp, rescaling = "lbg"))

denmark_lr_bl_mv <- as.data.frame(predict(ws_denmark_lr_bl, rescaling = "mv"))
denmark_eu_bl_mv <- as.data.frame(predict(ws_denmark_eu_bl, rescaling = "mv"))
denmark_ec_bl_mv <- as.data.frame(predict(ws_denmark_ec_bl, rescaling = "mv"))
denmark_so_bl_mv <- as.data.frame(predict(ws_denmark_so_bl, rescaling = "mv"))

denmark_lr_ches_mv <- as.data.frame(predict(ws_denmark_lr_ches, rescaling = "mv"))
denmark_eu_ches_mv <- as.data.frame(predict(ws_denmark_eu_ches, rescaling = "mv"))
denmark_ec_ches_mv <- as.data.frame(predict(ws_denmark_ec_ches, rescaling = "mv"))
denmark_so_ches_mv <- as.data.frame(predict(ws_denmark_so_ches, rescaling = "mv"))

denmark_lr_emp_mv <- as.data.frame(predict(ws_denmark_lr_emp, rescaling = "mv"))
denmark_eu_emp_mv <- as.data.frame(predict(ws_denmark_eu_emp, rescaling = "mv"))
denmark_ec_emp_mv <- as.data.frame(predict(ws_denmark_ec_emp, rescaling = "mv"))
denmark_so_emp_mv <- as.data.frame(predict(ws_denmark_so_emp, rescaling = "mv"))

names(denmark_lr_bl_lbg)[1] <- "bl_lr_lbg"
names(denmark_eu_bl_lbg)[1] <- "bl_eu_lbg"
names(denmark_ec_bl_lbg)[1] <- "bl_ec_lbg"
names(denmark_so_bl_lbg)[1] <- "bl_so_lbg"

names(denmark_lr_ches_lbg)[1] <- "ches_lr_lbg"
names(denmark_eu_ches_lbg)[1] <- "ches_eu_lbg"
names(denmark_ec_ches_lbg)[1] <- "ches_ec_lbg"
names(denmark_so_ches_lbg)[1] <- "ches_so_lbg"

names(denmark_lr_emp_lbg)[1] <- "emp_lr_lbg"
names(denmark_eu_emp_lbg)[1] <- "emp_eu_lbg"
names(denmark_ec_emp_lbg)[1] <- "emp_ec_lbg"
names(denmark_so_emp_lbg)[1] <- "emp_so_lbg"

names(denmark_lr_bl_mv)[1] <- "bl_lr_mv"
names(denmark_eu_bl_mv)[1] <- "bl_eu_mv"
names(denmark_ec_bl_mv)[1] <- "bl_ec_mv"
names(denmark_so_bl_mv)[1] <- "bl_so_mv"

names(denmark_lr_ches_mv)[1] <- "ches_lr_mv"
names(denmark_eu_ches_mv)[1] <- "ches_eu_mv"
names(denmark_ec_ches_mv)[1] <- "ches_ec_mv"
names(denmark_so_ches_mv)[1] <- "ches_so_mv"

names(denmark_lr_emp_mv)[1] <- "emp_lr_mv"
names(denmark_eu_emp_mv)[1] <- "emp_eu_mv"
names(denmark_ec_emp_mv)[1] <- "emp_ec_mv"
names(denmark_so_emp_mv)[1] <- "emp_so_mv"

denmark_wordscores <- cbind(denmark_lr_bl_lbg, denmark_eu_bl_lbg, denmark_ec_bl_lbg, denmark_so_bl_lbg,denmark_lr_ches_lbg, denmark_eu_ches_lbg, denmark_ec_ches_lbg, denmark_so_ches_lbg,denmark_lr_emp_lbg, denmark_eu_emp_lbg, denmark_ec_emp_lbg, denmark_so_emp_lbg,denmark_lr_bl_mv, denmark_eu_bl_mv, denmark_ec_bl_mv, denmark_so_bl_mv,denmark_lr_ches_mv, denmark_eu_ches_mv, denmark_ec_ches_mv, denmark_so_ches_mv,denmark_lr_emp_mv, denmark_eu_emp_mv, denmark_ec_emp_mv, denmark_so_emp_mv)
denmark_wordscores <- as.matrix(denmark_wordscores)
denmark_wordscores <- as.data.frame(denmark_wordscores)
setwd("~/Downloads/Replication Files/Wordscores/")
write.csv(denmark_wordscores, file= "denmark_wordscores.csv")
setwd("~/Downloads/Replication Files/Manifestos/")


################################################################################## Estonia ################################################################################

ER_04 <- pdf_text("Estonia/ER_04.pdf")
KESK_04 <- pdf_text("Estonia/KESK_04.pdf")
RESP_04 <- pdf_text("Estonia/RESP_04.pdf")
SDE_04 <- pdf_text("Estonia/SDE_04.pdf")

ER_09 <- pdf_text("Estonia/ER_09.pdf")
KESK_09 <- pdf_text("Estonia/KESK_09.pdf")
RESP_09 <- pdf_text("Estonia/RESP_09.pdf")
SDE_09 <- pdf_text("Estonia/SDE_09.pdf")

write(ER_04, "Estonia/ER_04.txt")
write(KESK_04, "Estonia/KESK_04.txt")
write(RESP_04, "Estonia/RESP_04.txt")
write(SDE_04, "Estonia/SDE_04.txt")
write(ER_09, "Estonia/ER_09.txt")
write(KESK_09, "Estonia/KESK_09.txt")
write(RESP_09, "Estonia/RESP_09.txt")
write(SDE_09, "Estonia/SDE_09.txt")

estonia_texts <- readtext("Estonia/*.txt")
estonia_corpus <- corpus(estonia_texts)
estonia_dfm <- dfm(estonia_corpus)
is.dfm(estonia_dfm)
estonia <- estonia_dfm

# Cleaning

estonia <- dfm_tolower(estonia)
estonia <- dfm_select(estonia,"[[:punct:]]", selection = "remove", valuetype = "regex", verbose = TRUE)
estonia <- dfm_select(estonia,"[[:digit:]]", selection = "remove", valuetype = "regex", verbose = TRUE)
estonia <- dfm_select(estonia,"[[:cntrl:]]", selection = "remove", valuetype = "regex", verbose = TRUE)
estonia <- dfm_select(estonia,"[[:space:]]", selection = "remove", valuetype = "regex", verbose = TRUE)
estonia <- dfm_select(estonia,"[[:blank:]]", selection = "remove", valuetype = "regex", verbose = TRUE)
estonia <- dfm_select(estonia, stopwords(language = "et", source = "stopwords-iso"), selection = "remove", valuetype = "fixed", verbose = TRUE)

estonia_dfm_words <- as.data.frame(ntoken(estonia_dfm))
estonia_dfm_uniquewords <- as.data.frame(ntype(estonia_dfm))

# Run Wordscores

# Estonia

estonia@Dimnames$docs

scores_estonia_lr_bl <- c(19.28571510315 ,NA, 8.857142448425 ,NA, 17.1428565979 ,NA,  9.142857551575,NA)
scores_estonia_ec_bl <- c(11.833333 ,NA, 8 ,NA, 16.666666  ,NA,   7.3333335 ,NA)
scores_estonia_so_bl <- c(16 ,NA, 12,NA,  12.6 ,NA,   6.5999999 ,NA)

scores_estonia_lr_emp <- c(7.001 ,NA, 5 ,NA,NA,NA,  3  ,NA)
scores_estonia_eu_emp <- c(6 ,NA, 7 ,NA,  8 ,NA, 9   ,NA)
scores_estonia_ec_emp <- c(7 ,NA, 5 ,NA, 8 ,NA, 3        ,NA)
scores_estonia_so_emp <- c(5 ,NA, 3.001 ,NA, 3 ,NA,  5   ,NA)

ws_estonia_lr_bl <- textmodel_wordscores(estonia, scores_estonia_lr_bl)
ws_estonia_ec_bl <- textmodel_wordscores(estonia, scores_estonia_ec_bl)
ws_estonia_so_bl <- textmodel_wordscores(estonia, scores_estonia_so_bl)

ws_estonia_lr_emp <- textmodel_wordscores(estonia, scores_estonia_lr_emp)
ws_estonia_eu_emp <- textmodel_wordscores(estonia, scores_estonia_eu_emp)
ws_estonia_ec_emp <- textmodel_wordscores(estonia, scores_estonia_ec_emp)
ws_estonia_so_emp <- textmodel_wordscores(estonia, scores_estonia_so_emp)


estonia_lr_bl_lbg <- as.data.frame(predict(ws_estonia_lr_bl, rescaling = "lbg"))
estonia_eu_bl_lbg <-data.frame(y = c(NA,NA,NA,NA,NA,NA,NA,NA), stringsAsFactors=FALSE)
estonia_ec_bl_lbg <- as.data.frame(predict(ws_estonia_ec_bl, rescaling = "lbg"))
estonia_so_bl_lbg <- as.data.frame(predict(ws_estonia_so_bl, rescaling = "lbg"))

estonia_lr_ches_lbg <- data.frame(y = c(NA,NA,NA,NA,NA,NA,NA,NA), stringsAsFactors=FALSE)
estonia_eu_ches_lbg <- data.frame(y = c(NA,NA,NA,NA,NA,NA,NA,NA), stringsAsFactors=FALSE)
estonia_ec_ches_lbg <- data.frame(y = c(NA,NA,NA,NA,NA,NA,NA,NA), stringsAsFactors=FALSE)
estonia_so_ches_lbg <- data.frame(y = c(NA,NA,NA,NA,NA,NA,NA,NA), stringsAsFactors=FALSE)

estonia_lr_emp_lbg <- as.data.frame(predict(ws_estonia_lr_emp, rescaling = "lbg"))
estonia_eu_emp_lbg <- as.data.frame(predict(ws_estonia_eu_emp, rescaling = "lbg"))
estonia_ec_emp_lbg <- as.data.frame(predict(ws_estonia_ec_emp, rescaling = "lbg"))
estonia_so_emp_lbg <- as.data.frame(predict(ws_estonia_so_emp, rescaling = "lbg"))

estonia_lr_bl_mv <- as.data.frame(predict(ws_estonia_lr_bl, rescaling = "mv"))
estonia_eu_bl_mv <- data.frame(y = c(NA,NA,NA,NA,NA,NA,NA,NA), stringsAsFactors=FALSE)
estonia_ec_bl_mv <- as.data.frame(predict(ws_estonia_ec_bl, rescaling = "mv"))
estonia_so_bl_mv <- as.data.frame(predict(ws_estonia_so_bl, rescaling = "mv"))

estonia_lr_ches_mv <- data.frame(y = c(NA,NA,NA,NA,NA,NA,NA,NA), stringsAsFactors=FALSE)
estonia_eu_ches_mv <- data.frame(y = c(NA,NA,NA,NA,NA,NA,NA,NA), stringsAsFactors=FALSE)
estonia_ec_ches_mv <- data.frame(y = c(NA,NA,NA,NA,NA,NA,NA,NA), stringsAsFactors=FALSE)
estonia_so_ches_mv <- data.frame(y = c(NA,NA,NA,NA,NA,NA,NA,NA), stringsAsFactors=FALSE)

estonia_lr_emp_mv <- as.data.frame(predict(ws_estonia_lr_emp, rescaling = "mv"))
estonia_eu_emp_mv <- as.data.frame(predict(ws_estonia_eu_emp, rescaling = "mv"))
estonia_ec_emp_mv <- as.data.frame(predict(ws_estonia_ec_emp, rescaling = "mv"))
estonia_so_emp_mv <- as.data.frame(predict(ws_estonia_so_emp, rescaling = "mv"))

names(estonia_lr_bl_lbg)[1] <- "bl_lr_lbg"
names(estonia_eu_bl_lbg)[1] <- "bl_eu_lbg"
names(estonia_ec_bl_lbg)[1] <- "bl_ec_lbg"
names(estonia_so_bl_lbg)[1] <- "bl_so_lbg"

names(estonia_lr_ches_lbg)[1] <- "ches_lr_lbg"
names(estonia_eu_ches_lbg)[1] <- "ches_eu_lbg"
names(estonia_ec_ches_lbg)[1] <- "ches_ec_lbg"
names(estonia_so_ches_lbg)[1] <- "ches_so_lbg"

names(estonia_lr_emp_lbg)[1] <- "emp_lr_lbg"
names(estonia_eu_emp_lbg)[1] <- "emp_eu_lbg"
names(estonia_ec_emp_lbg)[1] <- "emp_ec_lbg"
names(estonia_so_emp_lbg)[1] <- "emp_so_lbg"

names(estonia_lr_bl_mv)[1] <- "bl_lr_mv"
names(estonia_eu_bl_mv)[1] <- "bl_eu_mv"
names(estonia_ec_bl_mv)[1] <- "bl_ec_mv"
names(estonia_so_bl_mv)[1] <- "bl_so_mv"

names(estonia_lr_ches_mv)[1] <- "ches_lr_mv"
names(estonia_eu_ches_mv)[1] <- "ches_eu_mv"
names(estonia_ec_ches_mv)[1] <- "ches_ec_mv"
names(estonia_so_ches_mv)[1] <- "ches_so_mv"

names(estonia_lr_emp_mv)[1] <- "emp_lr_mv"
names(estonia_eu_emp_mv)[1] <- "emp_eu_mv"
names(estonia_ec_emp_mv)[1] <- "emp_ec_mv"
names(estonia_so_emp_mv)[1] <- "emp_so_mv"

estonia_wordscores <- cbind(estonia_lr_bl_lbg, estonia_eu_bl_lbg, estonia_ec_bl_lbg, estonia_so_bl_lbg,estonia_lr_ches_lbg, estonia_eu_ches_lbg, estonia_ec_ches_lbg, estonia_so_ches_lbg,estonia_lr_emp_lbg, estonia_eu_emp_lbg, estonia_ec_emp_lbg, estonia_so_emp_lbg,estonia_lr_bl_mv, estonia_eu_bl_mv, estonia_ec_bl_mv, estonia_so_bl_mv,estonia_lr_ches_mv, estonia_eu_ches_mv, estonia_ec_ches_mv, estonia_so_ches_mv,estonia_lr_emp_mv, estonia_eu_emp_mv, estonia_ec_emp_mv, estonia_so_emp_mv)
estonia_wordscores <- as.matrix(estonia_wordscores)
estonia_wordscores <- as.data.frame(estonia_wordscores)
setwd("~/Downloads/Replication Files/Wordscores/")
write.csv(estonia_wordscores, file= "estonia_wordscores.csv")
setwd("~/Downloads/Replication Files/Manifestos/")


################################################################################## Finland ################################################################################

KD_04 <- pdf_text("Finland/KD_04.pdf")
KESK_04 <- pdf_text("Finland/KESK_04.pdf")
KOK_04 <- pdf_text("Finland/KOK_04.pdf")
RKPSFP_04 <- pdf_text("Finland/RKPSFP_04.pdf")
SDP_04 <- pdf_text("Finland/SDP_04.pdf")
VAS_04 <- pdf_text("Finland/VAS_04.pdf")
VIHR_04 <- pdf_text("Finland/VIHR_04.pdf")

KD_09 <- pdf_text("Finland/KD_09.pdf")
KESK_09 <- pdf_text("Finland/KESK_09.pdf")
KOK_09 <- pdf_text("Finland/KOK_09.pdf")
RKPSFP_09 <- pdf_text("Finland/RKPSFP_09.pdf")
SDP_09 <- pdf_text("Finland/SDP_09.pdf")
VAS_09 <- pdf_text("Finland/VAS_09.pdf")
VIHR_09 <- pdf_text("Finland/VIHR_09.pdf")

write(KD_04, "Finland/KD_04.txt")
write(KESK_04, "Finland/KESK_04.txt")
write(KOK_04, "Finland/KOK_04.txt")
write(RKPSFP_04, "Finland/RKPSFP_04.txt")
write(SDP_04, "Finland/SDP_04.txt")
write(VAS_04, "Finland/VAS_04.txt")
write(VIHR_04, "Finland/VIHR_04.txt")
write(KD_09, "Finland/KD_09.txt")
write(KESK_09, "Finland/KESK_09.txt")
write(KOK_09, "Finland/KOK_09.txt")
write(RKPSFP_09, "Finland/RKPSFP_09.txt")
write(SDP_09, "Finland/SDP_09.txt")
write(VAS_09, "Finland/VAS_09.txt")
write(VIHR_09, "Finland/VIHR_09.txt")

finland_texts <- readtext("Finland/*.txt")
finland_corpus <- corpus(finland_texts)
finland_dfm <- dfm(finland_corpus)
is.dfm(finland_dfm)
finland <- finland_dfm

# Cleaning

finland <- dfm_tolower(finland)
finland <- dfm_select(finland,"[[:punct:]]", selection = "remove", valuetype = "regex", verbose = TRUE)
finland <- dfm_select(finland,"[[:digit:]]", selection = "remove", valuetype = "regex", verbose = TRUE)
finland <- dfm_select(finland,"[[:cntrl:]]", selection = "remove", valuetype = "regex", verbose = TRUE)
finland <- dfm_select(finland,"[[:space:]]", selection = "remove", valuetype = "regex", verbose = TRUE)
finland <- dfm_select(finland,"[[:blank:]]", selection = "remove", valuetype = "regex", verbose = TRUE)
finland <- dfm_select(finland, stopwords(language = "fi", source = "stopwords-iso"), selection = "remove", valuetype = "fixed", verbose = TRUE)


finland_dfm_words <- as.data.frame(ntoken(finland_dfm))
finland_dfm_uniquewords <- as.data.frame(ntype(finland_dfm))

# Run Wordscores

# Finland

finland@Dimnames$docs
scores_finland_lr_bl <- c(14.30303001404,NA,12,NA,15.57575798035,NA,13.75757598877,NA,8.39393901825,NA,4.454545497894,NA,7.515151500702,NA)
scores_finland_eu_bl <- c(6.5757599,NA,5.90909,NA,13.393939,NA,12.606061,NA,14.151515,NA,9.09091,NA,12.060606,NA)
scores_finland_ec_bl <- c(8.515152,NA,9.454545,NA,15.787879,NA,12.909091,NA,8.393939,NA,4.3333335,NA,7.121212,NA)
scores_finland_so_bl <- c(18.575758,NA,14.606061,NA,12.30303,NA,6.59375,NA,7.6060605,NA,5.5454545,NA,3.030303,NA)

scores_finland_lr_ches <- c(6.889999866486,NA,6.110000133514,NA,7,NA,6.55999994278,NA,3.670000076294,NA,2.44000005722,NA,3.670000076294,NA)
scores_finland_eu_ches <- c(3.890000104904,NA,4.780000209808,NA,6.670000076294,NA,6.55999994278,NA,6.44000005722,NA,4.44000005722,NA,5.670000076294,NA)
scores_finland_ec_ches <- c(5.2199998,NA,4.8899999,NA,7.2199998,NA,6.5599999,NA,3.8900001,NA,2,NA,3.3299999,NA)
scores_finland_so_ches <- c(8,NA,7.2199998,NA,5.7800002,NA,4.25,NA,3.8900001,NA,3.6700001,NA,2.22,NA)

scores_finland_lr_emp <- c(7,NA,6,NA,8,NA,4,NA,6,NA,3,NA,6,NA)
scores_finland_eu_emp <- c(4,NA,9.01,NA,8,NA,9,NA,8.99,NA,8,NA,8,NA)
scores_finland_ec_emp <- c(5,NA,5,NA,8,NA,NA,NA,5,NA,4,NA,5,NA)
scores_finland_so_emp <- c(5,NA,1,NA,1.001,NA,NA,NA,1.003,NA,2,NA,1.002,NA)

ws_finland_lr_bl <- textmodel_wordscores(finland, scores_finland_lr_bl)
ws_finland_eu_bl <- textmodel_wordscores(finland, scores_finland_eu_bl)
ws_finland_ec_bl <- textmodel_wordscores(finland, scores_finland_ec_bl)
ws_finland_so_bl <- textmodel_wordscores(finland, scores_finland_so_bl)

ws_finland_lr_ches <- textmodel_wordscores(finland, scores_finland_lr_ches)
ws_finland_eu_ches <- textmodel_wordscores(finland, scores_finland_eu_ches)
ws_finland_ec_ches <- textmodel_wordscores(finland, scores_finland_ec_ches)
ws_finland_so_ches <- textmodel_wordscores(finland, scores_finland_so_ches)

ws_finland_lr_emp <- textmodel_wordscores(finland, scores_finland_lr_emp)
ws_finland_eu_emp <- textmodel_wordscores(finland, scores_finland_eu_emp)
ws_finland_ec_emp <- textmodel_wordscores(finland, scores_finland_ec_emp)
ws_finland_so_emp <- textmodel_wordscores(finland, scores_finland_so_emp)


finland_lr_bl_lbg <- as.data.frame(predict(ws_finland_lr_bl, rescaling = "lbg"))
finland_eu_bl_lbg <- as.data.frame(predict(ws_finland_eu_bl, rescaling = "lbg"))
finland_ec_bl_lbg <- as.data.frame(predict(ws_finland_ec_bl, rescaling = "lbg"))
finland_so_bl_lbg <- as.data.frame(predict(ws_finland_so_bl, rescaling = "lbg"))

finland_lr_ches_lbg <- as.data.frame(predict(ws_finland_lr_ches, rescaling = "lbg"))
finland_eu_ches_lbg <- as.data.frame(predict(ws_finland_eu_ches, rescaling = "lbg"))
finland_ec_ches_lbg <- as.data.frame(predict(ws_finland_ec_ches, rescaling = "lbg"))
finland_so_ches_lbg <- as.data.frame(predict(ws_finland_so_ches, rescaling = "lbg"))

finland_lr_emp_lbg <- as.data.frame(predict(ws_finland_lr_emp, rescaling = "lbg"))
finland_eu_emp_lbg <- as.data.frame(predict(ws_finland_eu_emp, rescaling = "lbg"))
finland_ec_emp_lbg <- as.data.frame(predict(ws_finland_ec_emp, rescaling = "lbg"))
finland_so_emp_lbg <- as.data.frame(predict(ws_finland_so_emp, rescaling = "lbg"))

finland_lr_bl_mv <- as.data.frame(predict(ws_finland_lr_bl, rescaling = "mv"))
finland_eu_bl_mv <- as.data.frame(predict(ws_finland_eu_bl, rescaling = "mv"))
finland_ec_bl_mv <- as.data.frame(predict(ws_finland_ec_bl, rescaling = "mv"))
finland_so_bl_mv <- as.data.frame(predict(ws_finland_so_bl, rescaling = "mv"))

finland_lr_ches_mv <- as.data.frame(predict(ws_finland_lr_ches, rescaling = "mv"))
finland_eu_ches_mv <- as.data.frame(predict(ws_finland_eu_ches, rescaling = "mv"))
finland_ec_ches_mv <- as.data.frame(predict(ws_finland_ec_ches, rescaling = "mv"))
finland_so_ches_mv <- as.data.frame(predict(ws_finland_so_ches, rescaling = "mv"))

finland_lr_emp_mv <- as.data.frame(predict(ws_finland_lr_emp, rescaling = "mv"))
finland_eu_emp_mv <- as.data.frame(predict(ws_finland_eu_emp, rescaling = "mv"))
finland_ec_emp_mv <- as.data.frame(predict(ws_finland_ec_emp, rescaling = "mv"))
finland_so_emp_mv <- as.data.frame(predict(ws_finland_so_emp, rescaling = "mv"))

names(finland_lr_bl_lbg)[1] <- "bl_lr_lbg"
names(finland_eu_bl_lbg)[1] <- "bl_eu_lbg"
names(finland_ec_bl_lbg)[1] <- "bl_ec_lbg"
names(finland_so_bl_lbg)[1] <- "bl_so_lbg"

names(finland_lr_ches_lbg)[1] <- "ches_lr_lbg"
names(finland_eu_ches_lbg)[1] <- "ches_eu_lbg"
names(finland_ec_ches_lbg)[1] <- "ches_ec_lbg"
names(finland_so_ches_lbg)[1] <- "ches_so_lbg"

names(finland_lr_emp_lbg)[1] <- "emp_lr_lbg"
names(finland_eu_emp_lbg)[1] <- "emp_eu_lbg"
names(finland_ec_emp_lbg)[1] <- "emp_ec_lbg"
names(finland_so_emp_lbg)[1] <- "emp_so_lbg"

names(finland_lr_bl_mv)[1] <- "bl_lr_mv"
names(finland_eu_bl_mv)[1] <- "bl_eu_mv"
names(finland_ec_bl_mv)[1] <- "bl_ec_mv"
names(finland_so_bl_mv)[1] <- "bl_so_mv"

names(finland_lr_ches_mv)[1] <- "ches_lr_mv"
names(finland_eu_ches_mv)[1] <- "ches_eu_mv"
names(finland_ec_ches_mv)[1] <- "ches_ec_mv"
names(finland_so_ches_mv)[1] <- "ches_so_mv"

names(finland_lr_emp_mv)[1] <- "emp_lr_mv"
names(finland_eu_emp_mv)[1] <- "emp_eu_mv"
names(finland_ec_emp_mv)[1] <- "emp_ec_mv"
names(finland_so_emp_mv)[1] <- "emp_so_mv"

finland_wordscores <- cbind(finland_lr_bl_lbg, finland_eu_bl_lbg, finland_ec_bl_lbg, finland_so_bl_lbg,finland_lr_ches_lbg, finland_eu_ches_lbg, finland_ec_ches_lbg, finland_so_ches_lbg,finland_lr_emp_lbg, finland_eu_emp_lbg, finland_ec_emp_lbg, finland_so_emp_lbg,finland_lr_bl_mv, finland_eu_bl_mv, finland_ec_bl_mv, finland_so_bl_mv,finland_lr_ches_mv, finland_eu_ches_mv, finland_ec_ches_mv, finland_so_ches_mv,finland_lr_emp_mv, finland_eu_emp_mv, finland_ec_emp_mv, finland_so_emp_mv)
finland_wordscores <- as.matrix(finland_wordscores)
finland_wordscores <- as.data.frame(finland_wordscores)
setwd("~/Downloads/Replication Files/Wordscores/")
write.csv(finland_wordscores, file= "finland_wordscores.csv")
setwd("~/Downloads/Replication Files/Manifestos/")



################################################################################## France ################################################################################

FN_04 <- pdf_convert("France/FN_04.pdf")
FN_04 <- ocr(FN_04, engine = "fra")
PCF_04 <- pdf_convert("France/PCF_04.pdf")
PCF_04 <- ocr(PCF_04, engine = "fra")
PS_04 <- pdf_convert("France/PS_04.pdf")
PS_04 <- ocr(PS_04, engine = "fra")
UDF_04 <- pdf_convert("France/UDF_04.pdf")
UDF_04 <- ocr(UDF_04, engine = "fra")
UMP_04 <- pdf_convert("France/UMP_04.pdf")
UMP_04 <- ocr(UMP_04, engine = "fra")
UMP_09 <- pdf_convert("France/UMP_09.pdf")
UMP_09 <- ocr(UMP_09, engine = "fra")

FN_09 <- pdf_text("France/FN_09.pdf")
PCF_09 <- pdf_text("France/PCF_09.pdf")
PS_09 <- pdf_text("France/PS_09.pdf")
UDF_09 <- pdf_text("France/UDF_09.pdf")


write(FN_04, "France/FN_04.txt")
write(PCF_04, "France/PCF_04.txt")
write(PS_04, "France/PS_04.txt")
write(UDF_04, "France/UDF_04.txt")
write(UMP_04, "France/UMP_04.txt")
write(FN_09, "France/FN_09.txt")
write(PCF_09, "France/PCF_09.txt")
write(PS_09, "France/PS_09.txt")
write(UDF_09, "France/UDF_09.txt")
write(UMP_09, "France/UMP_09.txt")

france_texts <- readtext("France/*.txt")
france_corpus <- corpus(france_texts)
france_dfm <- dfm(france_corpus)
is.dfm(france_dfm)
france <- france_dfm

# Cleaning

france <- dfm_tolower(france)
france <- dfm_select(france,"[[:punct:]]", selection = "remove", valuetype = "regex", verbose = TRUE)
france <- dfm_select(france,"[[:digit:]]", selection = "remove", valuetype = "regex", verbose = TRUE)
france <- dfm_select(france,"[[:cntrl:]]", selection = "remove", valuetype = "regex", verbose = TRUE)
france <- dfm_select(france,"[[:space:]]", selection = "remove", valuetype = "regex", verbose = TRUE)
france <- dfm_select(france,"[[:blank:]]", selection = "remove", valuetype = "regex", verbose = TRUE)
france <- dfm_select(france, stopwords(language = "fr", source = "stopwords-iso"), selection = "remove", valuetype = "fixed", verbose = TRUE)

france_dfm_words <- as.data.frame(ntoken(france_dfm))
france_dfm_uniquewords <- as.data.frame(ntype(france_dfm))

# Run Wordscores

# France

# The 2004 score for the PCF on the SO EMP is the score for the PRG

france@Dimnames$docs
scores_france_lr_bl <- c(19.1,NA,15.3,NA,5.3200002,NA,3.4693899,NA,8.5681801,NA)
scores_france_ec_bl <- c(16.693878,NA,2.4000001,NA,7.0999999,NA,14.020409,NA,14.347826,NA)
scores_france_so_bl <- c(18.857143,NA,7.9166665,NA,5.1041665,NA,12,NA,14.444445,NA)

scores_france_lr_ches <- c(9.920000076294,NA,1.620000004768,NA,3.849999904633,NA,5.849999904633,NA,7,NA
)
scores_france_eu_ches <- c(1,NA,2.769999980927,NA,5.769999980927,NA,6.380000114441,NA,5,NA)
scores_france_ec_ches <- c(6,NA,1.5,NA,4.0799999,NA,5.92,NA,5.92,NA)
scores_france_so_ches <- c(9.8000002,NA,4.5,NA,3.1700001,NA,5.5799999,NA,7.25,NA)

scores_france_lr_emp <- c(10,NA,2,NA,4,NA,6,NA,6,NA)
scores_france_eu_emp <- c(1,NA,8,NA,9,NA,9.0001,NA,8.99999,NA)
scores_france_ec_emp <- c(3,NA,2,NA,4,NA,7,NA,NA,NA)
scores_france_so_emp <- c(10,NA,2,NA,NA,NA,NA,NA,NA,NA)

ws_france_lr_bl <- textmodel_wordscores(france, scores_france_lr_bl)
ws_france_ec_bl <- textmodel_wordscores(france, scores_france_ec_bl)
ws_france_so_bl <- textmodel_wordscores(france, scores_france_so_bl)

ws_france_lr_ches <- textmodel_wordscores(france, scores_france_lr_ches)
ws_france_eu_ches <- textmodel_wordscores(france, scores_france_eu_ches)
ws_france_ec_ches <- textmodel_wordscores(france, scores_france_ec_ches)
ws_france_so_ches <- textmodel_wordscores(france, scores_france_so_ches)

ws_france_lr_emp <- textmodel_wordscores(france, scores_france_lr_emp)
ws_france_eu_emp <- textmodel_wordscores(france, scores_france_eu_emp)
ws_france_ec_emp <- textmodel_wordscores(france, scores_france_ec_emp)
ws_france_so_emp <- textmodel_wordscores(france, scores_france_so_emp)


france_lr_bl_lbg <- as.data.frame(predict(ws_france_lr_bl, rescaling = "lbg"))
france_eu_bl_lbg <-data.frame(y = c(NA,NA,NA,NA,NA,NA,NA,NA,NA,NA), stringsAsFactors=FALSE)
france_ec_bl_lbg <- as.data.frame(predict(ws_france_ec_bl, rescaling = "lbg"))
france_so_bl_lbg <- as.data.frame(predict(ws_france_so_bl, rescaling = "lbg"))

france_lr_ches_lbg <- as.data.frame(predict(ws_france_lr_ches, rescaling = "lbg"))
france_eu_ches_lbg <- as.data.frame(predict(ws_france_eu_ches, rescaling = "lbg"))
france_ec_ches_lbg <- as.data.frame(predict(ws_france_ec_ches, rescaling = "lbg"))
france_so_ches_lbg <- as.data.frame(predict(ws_france_so_ches, rescaling = "lbg"))

france_lr_emp_lbg <- as.data.frame(predict(ws_france_lr_emp, rescaling = "lbg"))
france_eu_emp_lbg <- as.data.frame(predict(ws_france_eu_emp, rescaling = "lbg"))
france_ec_emp_lbg <- as.data.frame(predict(ws_france_ec_emp, rescaling = "lbg"))
france_so_emp_lbg <- as.data.frame(predict(ws_france_so_emp, rescaling = "lbg"))

france_lr_bl_mv <- as.data.frame(predict(ws_france_lr_bl, rescaling = "mv"))
france_eu_bl_mv <-data.frame(y = c(NA,NA,NA,NA,NA,NA,NA,NA,NA,NA), stringsAsFactors=FALSE)
france_ec_bl_mv <- as.data.frame(predict(ws_france_ec_bl, rescaling = "mv"))
france_so_bl_mv <- as.data.frame(predict(ws_france_so_bl, rescaling = "mv"))

france_lr_ches_mv <- as.data.frame(predict(ws_france_lr_ches, rescaling = "mv"))
france_eu_ches_mv <- as.data.frame(predict(ws_france_eu_ches, rescaling = "mv"))
france_ec_ches_mv <- as.data.frame(predict(ws_france_ec_ches, rescaling = "mv"))
france_so_ches_mv <- as.data.frame(predict(ws_france_so_ches, rescaling = "mv"))

france_lr_emp_mv <- as.data.frame(predict(ws_france_lr_emp, rescaling = "mv"))
france_eu_emp_mv <- as.data.frame(predict(ws_france_eu_emp, rescaling = "mv"))
france_ec_emp_mv <- as.data.frame(predict(ws_france_ec_emp, rescaling = "mv"))
france_so_emp_mv <- as.data.frame(predict(ws_france_so_emp, rescaling = "mv"))

names(france_lr_bl_lbg)[1] <- "bl_lr_lbg"
names(france_eu_bl_lbg)[1] <- "bl_eu_lbg"
names(france_ec_bl_lbg)[1] <- "bl_ec_lbg"
names(france_so_bl_lbg)[1] <- "bl_so_lbg"

names(france_lr_ches_lbg)[1] <- "ches_lr_lbg"
names(france_eu_ches_lbg)[1] <- "ches_eu_lbg"
names(france_ec_ches_lbg)[1] <- "ches_ec_lbg"
names(france_so_ches_lbg)[1] <- "ches_so_lbg"

names(france_lr_emp_lbg)[1] <- "emp_lr_lbg"
names(france_eu_emp_lbg)[1] <- "emp_eu_lbg"
names(france_ec_emp_lbg)[1] <- "emp_ec_lbg"
names(france_so_emp_lbg)[1] <- "emp_so_lbg"

names(france_lr_bl_mv)[1] <- "bl_lr_mv"
names(france_eu_bl_mv)[1] <- "bl_eu_mv"
names(france_ec_bl_mv)[1] <- "bl_ec_mv"
names(france_so_bl_mv)[1] <- "bl_so_mv"

names(france_lr_ches_mv)[1] <- "ches_lr_mv"
names(france_eu_ches_mv)[1] <- "ches_eu_mv"
names(france_ec_ches_mv)[1] <- "ches_ec_mv"
names(france_so_ches_mv)[1] <- "ches_so_mv"

names(france_lr_emp_mv)[1] <- "emp_lr_mv"
names(france_eu_emp_mv)[1] <- "emp_eu_mv"
names(france_ec_emp_mv)[1] <- "emp_ec_mv"
names(france_so_emp_mv)[1] <- "emp_so_mv"

france_wordscores <- cbind(france_lr_bl_lbg, france_eu_bl_lbg, france_ec_bl_lbg, france_so_bl_lbg,france_lr_ches_lbg, france_eu_ches_lbg, france_ec_ches_lbg, france_so_ches_lbg,france_lr_emp_lbg, france_eu_emp_lbg, france_ec_emp_lbg, france_so_emp_lbg,france_lr_bl_mv, france_eu_bl_mv, france_ec_bl_mv, france_so_bl_mv,france_lr_ches_mv, france_eu_ches_mv, france_ec_ches_mv, france_so_ches_mv,france_lr_emp_mv, france_eu_emp_mv, france_ec_emp_mv, france_so_emp_mv)
france_wordscores <- as.matrix(france_wordscores)
france_wordscores <- as.data.frame(france_wordscores)
setwd("~/Downloads/Replication Files/Wordscores/")
write.csv(france_wordscores, file= "france_wordscores.csv")
setwd("~/Downloads/Replication Files/Manifestos/")


################################################################################## Germany ################################################################################

B90GRUNEN_04 <- pdf_text("Germany/B90GRUNEN_04.pdf")
CDU_04 <- pdf_text("Germany/CDU_04.pdf")
CSU_04 <- pdf_text("Germany/CSU_04.pdf")
FDP_04 <- pdf_text("Germany/FDP_04.pdf")
LINKE_04 <- pdf_text("Germany/LINKE_04.pdf")
SPD_04 <- pdf_text("Germany/SPD_04.pdf")

B90GRUNEN_09 <- pdf_text("Germany/B90GRUNEN_09.pdf")
CDU_09 <- pdf_text("Germany/CDU_09.pdf")
CSU_09 <- pdf_text("Germany/CSU_09.pdf")
FDP_09 <- pdf_text("Germany/FDP_09.pdf")
LINKE_09 <- pdf_text("Germany/LINKE_09.pdf")
SPD_09 <- pdf_text("Germany/SPD_09.pdf")

write(B90GRUNEN_04, "Germany/B90GRUNEN_04.txt")
write(CDU_04, "Germany/CDU_04.txt")
write(CSU_04, "Germany/CSU_04.txt")
write(FDP_04, "Germany/FDP_04.txt")
write(LINKE_04, "Germany/LINKE_04.txt")
write(SPD_04, "Germany/SPD_04.txt")
write(B90GRUNEN_09, "Germany/B90GRUNEN_09.txt")
write(CDU_09, "Germany/CDU_09.txt")
write(CSU_09, "Germany/CSU_09.txt")
write(FDP_09, "Germany/FDP_09.txt")
write(LINKE_09, "Germany/LINKE_09.txt")
write(SPD_09, "Germany/SPD_09.txt")

germany_texts <- readtext("Germany/*.txt")
germany_corpus <- corpus(germany_texts)
germany_dfm <- dfm(germany_corpus)
is.dfm(germany_dfm)
germany <- germany_dfm

# Cleaning

germany <- dfm_tolower(germany)
germany <- dfm_select(germany,"[[:punct:]]", selection = "remove", valuetype = "regex", verbose = TRUE)
germany <- dfm_select(germany,"[[:digit:]]", selection = "remove", valuetype = "regex", verbose = TRUE)
germany <- dfm_select(germany,"[[:cntrl:]]", selection = "remove", valuetype = "regex", verbose = TRUE)
germany <- dfm_select(germany,"[[:space:]]", selection = "remove", valuetype = "regex", verbose = TRUE)
germany <- dfm_select(germany,"[[:blank:]]", selection = "remove", valuetype = "regex", verbose = TRUE)
germany <- dfm_select(germany, stopwords(language = "de", source = "stopwords-iso"), selection = "remove", valuetype = "fixed", verbose = TRUE)

germany_dfm_words <- as.data.frame(ntoken(germany_dfm))
germany_dfm_uniquewords <- as.data.frame(ntype(germany_dfm))

# Run Wordscores

# Germany

germany@Dimnames$docs
scores_germany_lr_bl <- c(7.104166507721,NA,13.58333301544,NA,13.59,NA,13.36458301544,NA,3.553191423416,NA,8.364583015442,NA)
scores_germany_eu_bl <- c(14.058139,NA,10.23,NA,10.22727,NA,11.04878,NA,10.31746,NA,12.955056,NA)
scores_germany_ec_bl <- c(11.020618,NA,14.4,NA,14.4,NA,18.712767,NA,2.9891305,NA,9.322917,NA)
scores_germany_so_bl <- c(2.3684211,NA,15.92,NA,15.914893,NA,5.2903228,NA,4.869565,NA,7.2631578,NA)

scores_germany_lr_ches <- c(3.359999895096,NA,5.929999828339,NA,7.360000133514,NA,6.070000171661,NA,1.639999985695,NA,4,NA)
scores_germany_eu_ches <- c(6.360000133514,NA,6.639999866486,NA,5.5,NA,6.570000171661,NA,4.070000171661,NA,6.110000133514,NA)
scores_germany_ec_ches <- c(4.5,NA,5.8600001,NA,6.21,NA,7.6399999,NA,1.29,NA,3.5699999,NA)
scores_germany_so_ches <- c(1.9299999,NA,6.6399999,NA,7.6399999,NA,3.5699999,NA,4.27,NA,4.5700002,NA)

scores_germany_lr_emp <- c(5,NA,8,NA,10,NA,8,NA,1,NA,4,NA)
scores_germany_eu_emp <- c(8,NA,6,NA,2,NA,7,NA,9,NA,8,NA)
scores_germany_ec_emp <- c(6,NA,9.01,NA,9,NA,8.89,NA,4,NA,8,NA)
scores_germany_so_emp <- c(5,NA,8.01,NA,8,NA,3,NA,5,NA,2,NA)

ws_germany_lr_bl <- textmodel_wordscores(germany, scores_germany_lr_bl)
ws_germany_eu_bl <- textmodel_wordscores(germany, scores_germany_eu_bl)
ws_germany_ec_bl <- textmodel_wordscores(germany, scores_germany_ec_bl)
ws_germany_so_bl <- textmodel_wordscores(germany, scores_germany_so_bl)

ws_germany_lr_ches <- textmodel_wordscores(germany, scores_germany_lr_ches)
ws_germany_eu_ches <- textmodel_wordscores(germany, scores_germany_eu_ches)
ws_germany_ec_ches <- textmodel_wordscores(germany, scores_germany_ec_ches)
ws_germany_so_ches <- textmodel_wordscores(germany, scores_germany_so_ches)

ws_germany_lr_emp <- textmodel_wordscores(germany, scores_germany_lr_emp)
ws_germany_eu_emp <- textmodel_wordscores(germany, scores_germany_eu_emp)
ws_germany_ec_emp <- textmodel_wordscores(germany, scores_germany_ec_emp)
ws_germany_so_emp <- textmodel_wordscores(germany, scores_germany_so_emp)


germany_lr_bl_lbg <- as.data.frame(predict(ws_germany_lr_bl, rescaling = "lbg"))
germany_eu_bl_lbg <- as.data.frame(predict(ws_germany_eu_bl, rescaling = "lbg"))
germany_ec_bl_lbg <- as.data.frame(predict(ws_germany_ec_bl, rescaling = "lbg"))
germany_so_bl_lbg <- as.data.frame(predict(ws_germany_so_bl, rescaling = "lbg"))

germany_lr_ches_lbg <- as.data.frame(predict(ws_germany_lr_ches, rescaling = "lbg"))
germany_eu_ches_lbg <- as.data.frame(predict(ws_germany_eu_ches, rescaling = "lbg"))
germany_ec_ches_lbg <- as.data.frame(predict(ws_germany_ec_ches, rescaling = "lbg"))
germany_so_ches_lbg <- as.data.frame(predict(ws_germany_so_ches, rescaling = "lbg"))

germany_lr_emp_lbg <- as.data.frame(predict(ws_germany_lr_emp, rescaling = "lbg"))
germany_eu_emp_lbg <- as.data.frame(predict(ws_germany_eu_emp, rescaling = "lbg"))
germany_ec_emp_lbg <- as.data.frame(predict(ws_germany_ec_emp, rescaling = "lbg"))
germany_so_emp_lbg <- as.data.frame(predict(ws_germany_so_emp, rescaling = "lbg"))

germany_lr_bl_mv <- as.data.frame(predict(ws_germany_lr_bl, rescaling = "mv"))
germany_eu_bl_mv <- as.data.frame(predict(ws_germany_eu_bl, rescaling = "mv"))
germany_ec_bl_mv <- as.data.frame(predict(ws_germany_ec_bl, rescaling = "mv"))
germany_so_bl_mv <- as.data.frame(predict(ws_germany_so_bl, rescaling = "mv"))

germany_lr_ches_mv <- as.data.frame(predict(ws_germany_lr_ches, rescaling = "mv"))
germany_eu_ches_mv <- as.data.frame(predict(ws_germany_eu_ches, rescaling = "mv"))
germany_ec_ches_mv <- as.data.frame(predict(ws_germany_ec_ches, rescaling = "mv"))
germany_so_ches_mv <- as.data.frame(predict(ws_germany_so_ches, rescaling = "mv"))

germany_lr_emp_mv <- as.data.frame(predict(ws_germany_lr_emp, rescaling = "mv"))
germany_eu_emp_mv <- as.data.frame(predict(ws_germany_eu_emp, rescaling = "mv"))
germany_ec_emp_mv <- as.data.frame(predict(ws_germany_ec_emp, rescaling = "mv"))
germany_so_emp_mv <- as.data.frame(predict(ws_germany_so_emp, rescaling = "mv"))

names(germany_lr_bl_lbg)[1] <- "bl_lr_lbg"
names(germany_eu_bl_lbg)[1] <- "bl_eu_lbg"
names(germany_ec_bl_lbg)[1] <- "bl_ec_lbg"
names(germany_so_bl_lbg)[1] <- "bl_so_lbg"

names(germany_lr_ches_lbg)[1] <- "ches_lr_lbg"
names(germany_eu_ches_lbg)[1] <- "ches_eu_lbg"
names(germany_ec_ches_lbg)[1] <- "ches_ec_lbg"
names(germany_so_ches_lbg)[1] <- "ches_so_lbg"

names(germany_lr_emp_lbg)[1] <- "emp_lr_lbg"
names(germany_eu_emp_lbg)[1] <- "emp_eu_lbg"
names(germany_ec_emp_lbg)[1] <- "emp_ec_lbg"
names(germany_so_emp_lbg)[1] <- "emp_so_lbg"

names(germany_lr_bl_mv)[1] <- "bl_lr_mv"
names(germany_eu_bl_mv)[1] <- "bl_eu_mv"
names(germany_ec_bl_mv)[1] <- "bl_ec_mv"
names(germany_so_bl_mv)[1] <- "bl_so_mv"

names(germany_lr_ches_mv)[1] <- "ches_lr_mv"
names(germany_eu_ches_mv)[1] <- "ches_eu_mv"
names(germany_ec_ches_mv)[1] <- "ches_ec_mv"
names(germany_so_ches_mv)[1] <- "ches_so_mv"

names(germany_lr_emp_mv)[1] <- "emp_lr_mv"
names(germany_eu_emp_mv)[1] <- "emp_eu_mv"
names(germany_ec_emp_mv)[1] <- "emp_ec_mv"
names(germany_so_emp_mv)[1] <- "emp_so_mv"

germany_wordscores <- cbind(germany_lr_bl_lbg, germany_eu_bl_lbg, germany_ec_bl_lbg, germany_so_bl_lbg,germany_lr_ches_lbg, germany_eu_ches_lbg, germany_ec_ches_lbg, germany_so_ches_lbg,germany_lr_emp_lbg, germany_eu_emp_lbg, germany_ec_emp_lbg, germany_so_emp_lbg,germany_lr_bl_mv, germany_eu_bl_mv, germany_ec_bl_mv, germany_so_bl_mv,germany_lr_ches_mv, germany_eu_ches_mv, germany_ec_ches_mv, germany_so_ches_mv,germany_lr_emp_mv, germany_eu_emp_mv, germany_ec_emp_mv, germany_so_emp_mv)
germany_wordscores <- as.matrix(germany_wordscores)
germany_wordscores <- as.data.frame(germany_wordscores)
setwd("~/Downloads/Replication Files/Wordscores/")
write.csv(germany_wordscores, file= "germany_wordscores.csv")
setwd("~/Downloads/Replication Files/Manifestos/")


################################################################################## Great Britain ################################################################################

CON_04 <- pdf_text("Great Britain/CON_04.pdf")
LABOUR_04 <- pdf_text("Great Britain/LABOUR_04.pdf")
LIBDEM_04 <- pdf_text("Great Britain/LIBDEM_04.pdf")
PC_04 <- pdf_text("Great Britain/PC_04.pdf")
SNP_04 <- pdf_text("Great Britain/SNP_04.pdf")

CON_09 <- pdf_text("Great Britain/CON_09.pdf")
LABOUR_09 <- pdf_text("Great Britain/LABOUR_09.pdf")
LIBDEM_09 <- pdf_text("Great Britain/LIBDEM_09.pdf")
PC_09 <- pdf_text("Great Britain/PC_09.pdf")
SNP_09 <- pdf_text("Great Britain/SNP_09.pdf")

write(CON_04, "Great Britain/CON_04.txt")
write(LABOUR_04, "Great Britain/LABOUR_04.txt")
write(LIBDEM_04, "Great Britain/LIBDEM_04.txt")
write(PC_04, "Great Britain/PC_04.txt")
write(SNP_04, "Great Britain/SNP_04.txt")
write(CON_09, "Great Britain/CON_09.txt")
write(LABOUR_09, "Great Britain/LABOUR_09.txt")
write(LIBDEM_09, "Great Britain/LIBDEM_09.txt")
write(PC_09, "Great Britain/PC_09.txt")
write(SNP_09, "Great Britain/SNP_09.txt")

greatbritain_texts <- readtext("Great Britain/*.txt")
greatbritain_corpus <- corpus(greatbritain_texts)
greatbritain_dfm <- dfm(greatbritain_corpus)
is.dfm(greatbritain_dfm)
greatbritain <- greatbritain_dfm

# Cleaning

greatbritain <- dfm_tolower(greatbritain)
greatbritain <- dfm_select(greatbritain,"[[:punct:]]", selection = "remove", valuetype = "regex", verbose = TRUE)
greatbritain <- dfm_select(greatbritain,"[[:digit:]]", selection = "remove", valuetype = "regex", verbose = TRUE)
greatbritain <- dfm_select(greatbritain,"[[:cntrl:]]", selection = "remove", valuetype = "regex", verbose = TRUE)
greatbritain <- dfm_select(greatbritain,"[[:space:]]", selection = "remove", valuetype = "regex", verbose = TRUE)
greatbritain <- dfm_select(greatbritain,"[[:blank:]]", selection = "remove", valuetype = "regex", verbose = TRUE)
greatbritain <- dfm_select(greatbritain, stopwords(language = "en", source = "stopwords-iso"), selection = "remove", valuetype = "fixed", verbose = TRUE)

greatbritain_dfm_words <- as.data.frame(ntoken(greatbritain_dfm))
greatbritain_dfm_uniquewords <- as.data.frame(ntype(greatbritain_dfm))

# Run Wordscores

# Great Britain

greatbritain@Dimnames$docs
scores_greatbritain_lr_bl <- c(16.38596534729,NA,10.94736862183,NA,7.91228055954,NA,6.018867969513,NA,7.134615421295,NA)
scores_greatbritain_eu_bl <- c(3.4035101,NA,10.98246,NA,15.854546,NA,14.756757,NA,14.575,NA)
scores_greatbritain_ec_bl <- c(15.321428,NA,8.087719,NA,5.7894735,NA,5.2222223,NA,6.0652175,NA)
scores_greatbritain_so_bl <- c(15.263158,NA,6.9122806,NA,4.1428571,NA,7.7027025,NA,8.0540543,NA)

scores_greatbritain_lr_ches <- c(7.719999790192,NA,5.179999828339,NA,3.819999933243,NA,3.200000047684,NA,3,NA)
scores_greatbritain_eu_ches <- c(2,NA,5.219999790192,NA,6.670000076294,NA,5.75,NA,5.539999961853,NA)
scores_greatbritain_ec_ches <- c(8.0600004,NA,5.2800002,NA,4.2199998,NA,3.0699999,NA,2.8299999,NA)
scores_greatbritain_so_ches <- c(8.1700001,NA,4.8299999,NA,2.5,NA,4.1900001,NA,4.5,NA)

scores_greatbritain_lr_emp <- c(7,NA,4,NA,4,NA,2,NA,2.001,NA)
scores_greatbritain_eu_emp <- c(3,NA,7,NA,7,NA,8,NA,7,NA)
scores_greatbritain_ec_emp <- c(8,NA,4,NA,5,NA,2,NA,2.001,NA)
scores_greatbritain_so_emp <- c(4,NA,3,NA,2,NA,3,NA,1,NA)

ws_greatbritain_lr_bl <- textmodel_wordscores(greatbritain, scores_greatbritain_lr_bl)
ws_greatbritain_eu_bl <- textmodel_wordscores(greatbritain, scores_greatbritain_eu_bl)
ws_greatbritain_ec_bl <- textmodel_wordscores(greatbritain, scores_greatbritain_ec_bl)
ws_greatbritain_so_bl <- textmodel_wordscores(greatbritain, scores_greatbritain_so_bl)

ws_greatbritain_lr_ches <- textmodel_wordscores(greatbritain, scores_greatbritain_lr_ches)
ws_greatbritain_eu_ches <- textmodel_wordscores(greatbritain, scores_greatbritain_eu_ches)
ws_greatbritain_ec_ches <- textmodel_wordscores(greatbritain, scores_greatbritain_ec_ches)
ws_greatbritain_so_ches <- textmodel_wordscores(greatbritain, scores_greatbritain_so_ches)

ws_greatbritain_lr_emp <- textmodel_wordscores(greatbritain, scores_greatbritain_lr_emp)
ws_greatbritain_eu_emp <- textmodel_wordscores(greatbritain, scores_greatbritain_eu_emp)
ws_greatbritain_ec_emp <- textmodel_wordscores(greatbritain, scores_greatbritain_ec_emp)
ws_greatbritain_so_emp <- textmodel_wordscores(greatbritain, scores_greatbritain_so_emp)


greatbritain_lr_bl_lbg <- as.data.frame(predict(ws_greatbritain_lr_bl, rescaling = "lbg"))
greatbritain_eu_bl_lbg <- as.data.frame(predict(ws_greatbritain_eu_bl, rescaling = "lbg"))
greatbritain_ec_bl_lbg <- as.data.frame(predict(ws_greatbritain_ec_bl, rescaling = "lbg"))
greatbritain_so_bl_lbg <- as.data.frame(predict(ws_greatbritain_so_bl, rescaling = "lbg"))

greatbritain_lr_ches_lbg <- as.data.frame(predict(ws_greatbritain_lr_ches, rescaling = "lbg"))
greatbritain_eu_ches_lbg <- as.data.frame(predict(ws_greatbritain_eu_ches, rescaling = "lbg"))
greatbritain_ec_ches_lbg <- as.data.frame(predict(ws_greatbritain_ec_ches, rescaling = "lbg"))
greatbritain_so_ches_lbg <- as.data.frame(predict(ws_greatbritain_so_ches, rescaling = "lbg"))

greatbritain_lr_emp_lbg <- as.data.frame(predict(ws_greatbritain_lr_emp, rescaling = "lbg"))
greatbritain_eu_emp_lbg <- as.data.frame(predict(ws_greatbritain_eu_emp, rescaling = "lbg"))
greatbritain_ec_emp_lbg <- as.data.frame(predict(ws_greatbritain_ec_emp, rescaling = "lbg"))
greatbritain_so_emp_lbg <- as.data.frame(predict(ws_greatbritain_so_emp, rescaling = "lbg"))

greatbritain_lr_bl_mv <- as.data.frame(predict(ws_greatbritain_lr_bl, rescaling = "mv"))
greatbritain_eu_bl_mv <- as.data.frame(predict(ws_greatbritain_eu_bl, rescaling = "mv"))
greatbritain_ec_bl_mv <- as.data.frame(predict(ws_greatbritain_ec_bl, rescaling = "mv"))
greatbritain_so_bl_mv <- as.data.frame(predict(ws_greatbritain_so_bl, rescaling = "mv"))

greatbritain_lr_ches_mv <- as.data.frame(predict(ws_greatbritain_lr_ches, rescaling = "mv"))
greatbritain_eu_ches_mv <- as.data.frame(predict(ws_greatbritain_eu_ches, rescaling = "mv"))
greatbritain_ec_ches_mv <- as.data.frame(predict(ws_greatbritain_ec_ches, rescaling = "mv"))
greatbritain_so_ches_mv <- as.data.frame(predict(ws_greatbritain_so_ches, rescaling = "mv"))

greatbritain_lr_emp_mv <- as.data.frame(predict(ws_greatbritain_lr_emp, rescaling = "mv"))
greatbritain_eu_emp_mv <- as.data.frame(predict(ws_greatbritain_eu_emp, rescaling = "mv"))
greatbritain_ec_emp_mv <- as.data.frame(predict(ws_greatbritain_ec_emp, rescaling = "mv"))
greatbritain_so_emp_mv <- as.data.frame(predict(ws_greatbritain_so_emp, rescaling = "mv"))

names(greatbritain_lr_bl_lbg)[1] <- "bl_lr_lbg"
names(greatbritain_eu_bl_lbg)[1] <- "bl_eu_lbg"
names(greatbritain_ec_bl_lbg)[1] <- "bl_ec_lbg"
names(greatbritain_so_bl_lbg)[1] <- "bl_so_lbg"

names(greatbritain_lr_ches_lbg)[1] <- "ches_lr_lbg"
names(greatbritain_eu_ches_lbg)[1] <- "ches_eu_lbg"
names(greatbritain_ec_ches_lbg)[1] <- "ches_ec_lbg"
names(greatbritain_so_ches_lbg)[1] <- "ches_so_lbg"

names(greatbritain_lr_emp_lbg)[1] <- "emp_lr_lbg"
names(greatbritain_eu_emp_lbg)[1] <- "emp_eu_lbg"
names(greatbritain_ec_emp_lbg)[1] <- "emp_ec_lbg"
names(greatbritain_so_emp_lbg)[1] <- "emp_so_lbg"

names(greatbritain_lr_bl_mv)[1] <- "bl_lr_mv"
names(greatbritain_eu_bl_mv)[1] <- "bl_eu_mv"
names(greatbritain_ec_bl_mv)[1] <- "bl_ec_mv"
names(greatbritain_so_bl_mv)[1] <- "bl_so_mv"

names(greatbritain_lr_ches_mv)[1] <- "ches_lr_mv"
names(greatbritain_eu_ches_mv)[1] <- "ches_eu_mv"
names(greatbritain_ec_ches_mv)[1] <- "ches_ec_mv"
names(greatbritain_so_ches_mv)[1] <- "ches_so_mv"

names(greatbritain_lr_emp_mv)[1] <- "emp_lr_mv"
names(greatbritain_eu_emp_mv)[1] <- "emp_eu_mv"
names(greatbritain_ec_emp_mv)[1] <- "emp_ec_mv"
names(greatbritain_so_emp_mv)[1] <- "emp_so_mv"

greatbritain_wordscores <- cbind(greatbritain_lr_bl_lbg, greatbritain_eu_bl_lbg, greatbritain_ec_bl_lbg, greatbritain_so_bl_lbg,greatbritain_lr_ches_lbg, greatbritain_eu_ches_lbg, greatbritain_ec_ches_lbg, greatbritain_so_ches_lbg,greatbritain_lr_emp_lbg, greatbritain_eu_emp_lbg, greatbritain_ec_emp_lbg, greatbritain_so_emp_lbg,greatbritain_lr_bl_mv, greatbritain_eu_bl_mv, greatbritain_ec_bl_mv, greatbritain_so_bl_mv,greatbritain_lr_ches_mv, greatbritain_eu_ches_mv, greatbritain_ec_ches_mv, greatbritain_so_ches_mv,greatbritain_lr_emp_mv, greatbritain_eu_emp_mv, greatbritain_ec_emp_mv, greatbritain_so_emp_mv)
greatbritain_wordscores <- as.matrix(greatbritain_wordscores)
greatbritain_wordscores <- as.data.frame(greatbritain_wordscores)
setwd("~/Downloads/Replication Files/Wordscores/")
write.csv(greatbritain_wordscores, file= "greatbritain_wordscores.csv")
setwd("~/Downloads/Replication Files/Manifestos/")


################################################################################## Greece ################################################################################

ND_04 <- pdf_convert("Greece/ND_04.pdf", dpi=50)
ND_04 <- ocr(ND_04, engine = "Greek")

ND_09 <- pdf_convert("Greece/ND_09.pdf", dpi=40)
ND_09 <- ocr(ND_09, engine = "Greek")
PASOK_09 <- pdf_convert("Greece/PASOK_09.pdf")
PASOK_09 <- ocr(PASOK_09, engine = "Greek")
SYRIZA_09 <- pdf_convert("Greece/SYRIZA_09.pdf")
SYRIZA_09 <- ocr(SYRIZA_09, engine = "Greek")

KKE_04 <- pdf_text("Greece/KKE_04.pdf")
PASOK_04 <- pdf_text("Greece/PASOK_04.pdf")
SYRIZA_04 <- pdf_text("Greece/SYRIZA_04.pdf")

KKE_09 <- pdf_text("Greece/KKE_09.pdf")

write(ND_04, "Greece/ND_04.txt")
write(KKE_04, "Greece/KKE_04.txt")
write(PASOK_04, "Greece/PASOK_04.txt")
write(SYRIZA_04, "Greece/SYRIZA_04.txt")
write(ND_09, "Greece/ND_09.txt")
write(KKE_09, "Greece/KKE_09.txt")
write(PASOK_09, "Greece/PASOK_09.txt")
write(SYRIZA_09, "Greece/SYRIZA_09.txt")

greece_texts <- readtext("Greece/*.txt")
greece_corpus <- corpus(greece_texts)
greece_dfm <- dfm(greece_corpus)
is.dfm(greece_dfm)
greece <- greece_dfm

# Cleaning

greece <- dfm_tolower(greece)
greece <- dfm_select(greece,"[[:punct:]]", selection = "remove", valuetype = "regex", verbose = TRUE)
greece <- dfm_select(greece,"[[:digit:]]", selection = "remove", valuetype = "regex", verbose = TRUE)
greece <- dfm_select(greece,"[[:cntrl:]]", selection = "remove", valuetype = "regex", verbose = TRUE)
greece <- dfm_select(greece,"[[:space:]]", selection = "remove", valuetype = "regex", verbose = TRUE)
greece <- dfm_select(greece,"[[:blank:]]", selection = "remove", valuetype = "regex", verbose = TRUE)
greece <- dfm_select(greece, stopwords(language = "el", source = "stopwords-iso"), selection = "remove", valuetype = "fixed", verbose = TRUE)

greece_dfm_words <- as.data.frame(ntoken(greece_dfm))
greece_dfm_uniquewords <- as.data.frame(ntype(greece_dfm))

# Run Wordscores

# Greece

greece@Dimnames$docs
scores_greece_lr_bl <- c(6.375,NA,15.5625,NA,10.4375,NA,6.5,NA)
scores_greece_eu_bl <- c(1.8125,NA,13.75,NA,15.125,NA,14.5625,NA)
scores_greece_ec_bl <- c(4.4285712,NA,14.8,NA,10.9375,NA,6.5333333,NA)
scores_greece_so_bl <- c(10.466666,NA,14,NA,7.8125,NA,4.8125,NA)

scores_greece_lr_ches <- c(.8199999928474,NA,6.269999980927,NA,3.910000085831,NA,2.859999895096,NA)
scores_greece_eu_ches <- c(1.360000014305,NA,6.550000190735,NA,6.820000171661,NA,6,NA)
scores_greece_ec_ches <- c(.58999997,NA,6.3200002,NA,4.6399999,NA,2.9100001,NA)
scores_greece_so_ches <- c(6.0500002,NA,6.4499998,NA,3.8599999,NA,2.0899999,NA)

scores_greece_lr_emp <- c(1,NA,7,NA,5,NA,3,NA)
scores_greece_eu_emp <- c(1,NA,10,NA,9.99,NA,7,NA)
scores_greece_ec_emp <- c(NA,NA,9,NA,7,NA,3,NA)
scores_greece_so_emp <- c(5,NA,2,NA,NA,NA,1,NA)

ws_greece_lr_bl <- textmodel_wordscores(greece, scores_greece_lr_bl)
ws_greece_eu_bl <- textmodel_wordscores(greece, scores_greece_eu_bl)
ws_greece_ec_bl <- textmodel_wordscores(greece, scores_greece_ec_bl)
ws_greece_so_bl <- textmodel_wordscores(greece, scores_greece_so_bl)

ws_greece_lr_ches <- textmodel_wordscores(greece, scores_greece_lr_ches)
ws_greece_eu_ches <- textmodel_wordscores(greece, scores_greece_eu_ches)
ws_greece_ec_ches <- textmodel_wordscores(greece, scores_greece_ec_ches)
ws_greece_so_ches <- textmodel_wordscores(greece, scores_greece_so_ches)

ws_greece_lr_emp <- textmodel_wordscores(greece, scores_greece_lr_emp)
ws_greece_eu_emp <- textmodel_wordscores(greece, scores_greece_eu_emp)
ws_greece_ec_emp <- textmodel_wordscores(greece, scores_greece_ec_emp)
ws_greece_so_emp <- textmodel_wordscores(greece, scores_greece_so_emp)


greece_lr_bl_lbg <- as.data.frame(predict(ws_greece_lr_bl, rescaling = "lbg"))
greece_eu_bl_lbg <- as.data.frame(predict(ws_greece_eu_bl, rescaling = "lbg"))
greece_ec_bl_lbg <- as.data.frame(predict(ws_greece_ec_bl, rescaling = "lbg"))
greece_so_bl_lbg <- as.data.frame(predict(ws_greece_so_bl, rescaling = "lbg"))

greece_lr_ches_lbg <- as.data.frame(predict(ws_greece_lr_ches, rescaling = "lbg"))
greece_eu_ches_lbg <- as.data.frame(predict(ws_greece_eu_ches, rescaling = "lbg"))
greece_ec_ches_lbg <- as.data.frame(predict(ws_greece_ec_ches, rescaling = "lbg"))
greece_so_ches_lbg <- as.data.frame(predict(ws_greece_so_ches, rescaling = "lbg"))

greece_lr_emp_lbg <- as.data.frame(predict(ws_greece_lr_emp, rescaling = "lbg"))
greece_eu_emp_lbg <- as.data.frame(predict(ws_greece_eu_emp, rescaling = "lbg"))
greece_ec_emp_lbg <- as.data.frame(predict(ws_greece_ec_emp, rescaling = "lbg"))
greece_so_emp_lbg <- as.data.frame(predict(ws_greece_so_emp, rescaling = "lbg"))

greece_lr_bl_mv <- as.data.frame(predict(ws_greece_lr_bl, rescaling = "mv"))
greece_eu_bl_mv <- as.data.frame(predict(ws_greece_eu_bl, rescaling = "mv"))
greece_ec_bl_mv <- as.data.frame(predict(ws_greece_ec_bl, rescaling = "mv"))
greece_so_bl_mv <- as.data.frame(predict(ws_greece_so_bl, rescaling = "mv"))

greece_lr_ches_mv <- as.data.frame(predict(ws_greece_lr_ches, rescaling = "mv"))
greece_eu_ches_mv <- as.data.frame(predict(ws_greece_eu_ches, rescaling = "mv"))
greece_ec_ches_mv <- as.data.frame(predict(ws_greece_ec_ches, rescaling = "mv"))
greece_so_ches_mv <- as.data.frame(predict(ws_greece_so_ches, rescaling = "mv"))

greece_lr_emp_mv <- as.data.frame(predict(ws_greece_lr_emp, rescaling = "mv"))
greece_eu_emp_mv <- as.data.frame(predict(ws_greece_eu_emp, rescaling = "mv"))
greece_ec_emp_mv <- as.data.frame(predict(ws_greece_ec_emp, rescaling = "mv"))
greece_so_emp_mv <- as.data.frame(predict(ws_greece_so_emp, rescaling = "mv"))

names(greece_lr_bl_lbg)[1] <- "bl_lr_lbg"
names(greece_eu_bl_lbg)[1] <- "bl_eu_lbg"
names(greece_ec_bl_lbg)[1] <- "bl_ec_lbg"
names(greece_so_bl_lbg)[1] <- "bl_so_lbg"

names(greece_lr_ches_lbg)[1] <- "ches_lr_lbg"
names(greece_eu_ches_lbg)[1] <- "ches_eu_lbg"
names(greece_ec_ches_lbg)[1] <- "ches_ec_lbg"
names(greece_so_ches_lbg)[1] <- "ches_so_lbg"

names(greece_lr_emp_lbg)[1] <- "emp_lr_lbg"
names(greece_eu_emp_lbg)[1] <- "emp_eu_lbg"
names(greece_ec_emp_lbg)[1] <- "emp_ec_lbg"
names(greece_so_emp_lbg)[1] <- "emp_so_lbg"

names(greece_lr_bl_mv)[1] <- "bl_lr_mv"
names(greece_eu_bl_mv)[1] <- "bl_eu_mv"
names(greece_ec_bl_mv)[1] <- "bl_ec_mv"
names(greece_so_bl_mv)[1] <- "bl_so_mv"

names(greece_lr_ches_mv)[1] <- "ches_lr_mv"
names(greece_eu_ches_mv)[1] <- "ches_eu_mv"
names(greece_ec_ches_mv)[1] <- "ches_ec_mv"
names(greece_so_ches_mv)[1] <- "ches_so_mv"

names(greece_lr_emp_mv)[1] <- "emp_lr_mv"
names(greece_eu_emp_mv)[1] <- "emp_eu_mv"
names(greece_ec_emp_mv)[1] <- "emp_ec_mv"
names(greece_so_emp_mv)[1] <- "emp_so_mv"

greece_wordscores <- cbind(greece_lr_bl_lbg, greece_eu_bl_lbg, greece_ec_bl_lbg, greece_so_bl_lbg,greece_lr_ches_lbg, greece_eu_ches_lbg, greece_ec_ches_lbg, greece_so_ches_lbg,greece_lr_emp_lbg, greece_eu_emp_lbg, greece_ec_emp_lbg, greece_so_emp_lbg,greece_lr_bl_mv, greece_eu_bl_mv, greece_ec_bl_mv, greece_so_bl_mv,greece_lr_ches_mv, greece_eu_ches_mv, greece_ec_ches_mv, greece_so_ches_mv,greece_lr_emp_mv, greece_eu_emp_mv, greece_ec_emp_mv, greece_so_emp_mv)
greece_wordscores <- as.matrix(greece_wordscores)
greece_wordscores <- as.data.frame(greece_wordscores)
setwd("~/Downloads/Replication Files/Wordscores/")
write.csv(greece_wordscores, file= "greece_wordscores.csv")
setwd("~/Downloads/Replication Files/Manifestos/")


################################################################################## Hungary ################################################################################

MSZP_09 <- pdf_convert("Hungary/MSZP_09.pdf")
MSZP_09 <- ocr(MSZP_09, engine = "hun")
SZDSZ_09 <- pdf_convert("Hungary/SZDSZ_09.pdf")
SZDSZ_09 <- ocr(SZDSZ_09, engine = "hun")

FIDESZ_04 <- pdf_text("Hungary/FIDESZ_04.pdf")
MDF_04 <- pdf_text("Hungary/MDF_04.pdf")
MSZP_04 <- pdf_text("Hungary/MSZP_04.pdf")
SZDSZ_04 <- pdf_text("Hungary/SZDSZ_04.pdf")

FIDESZ_09 <- pdf_text("Hungary/FIDESZ_09.pdf")
MDF_09 <- pdf_text("Hungary/MDF_09.pdf")

write(FIDESZ_04, "Hungary/FIDESZ_04.txt")
write(MDF_04, "Hungary/MDF_04.txt")
write(MSZP_04, "Hungary/MSZP_04.txt")
write(SZDSZ_04, "Hungary/SZDSZ_04.txt")
write(FIDESZ_09, "Hungary/FIDESZ_09.txt")
write(MDF_09, "Hungary/MDF_09.txt")
write(MSZP_09, "Hungary/MSZP_09.txt")
write(SZDSZ_09, "Hungary/SZDSZ_09.txt")

hungary_texts <- readtext("Hungary/*.txt")
hungary_corpus <- corpus(hungary_texts)
hungary_dfm <- dfm(hungary_corpus)
is.dfm(hungary_dfm)
hungary <- hungary_dfm

# Cleaning

hungary <- dfm_tolower(hungary)
hungary <- dfm_select(hungary,"[[:punct:]]", selection = "remove", valuetype = "regex", verbose = TRUE)
hungary <- dfm_select(hungary,"[[:digit:]]", selection = "remove", valuetype = "regex", verbose = TRUE)
hungary <- dfm_select(hungary,"[[:cntrl:]]", selection = "remove", valuetype = "regex", verbose = TRUE)
hungary <- dfm_select(hungary,"[[:space:]]", selection = "remove", valuetype = "regex", verbose = TRUE)
hungary <- dfm_select(hungary,"[[:blank:]]", selection = "remove", valuetype = "regex", verbose = TRUE)
hungary <- dfm_select(hungary, stopwords(language = "hu", source = "stopwords-iso"), selection = "remove", valuetype = "fixed", verbose = TRUE)

hungary_dfm_words <- as.data.frame(ntoken(hungary_dfm))
hungary_dfm_uniquewords <- as.data.frame(ntype(hungary_dfm))

# Run Wordscores

# Hungary

hungary@Dimnames$docs
scores_hungary_lr_bl <- c(15.09523773193,NA,13.625,NA,7.023809432983,NA,8.243902206421,NA)
scores_hungary_ec_bl <- c(9.2926826,NA,9.9459457,NA,10.463414,NA,15.170732,NA)
scores_hungary_so_bl <- c(15.121951,NA,14.925,NA,7.4390244,NA,2.3,NA)

scores_hungary_lr_ches <- c(7.639999866486,NA,6.429999828339,NA,3.390000104904,NA,4.210000038147,NA)
scores_hungary_eu_ches <- c(4.639999866486,NA,5.860000133514,NA,6.860000133514,NA,6.87,NA)
scores_hungary_ec_ches <- c(4.6199999,NA,5.1199999,NA,5.23,NA,6.9200001,NA)
scores_hungary_so_ches <- c(8.1499996,NA,7.3499999,NA,3.3800001,NA,1.46,NA)

scores_hungary_lr_emp <- c(6,NA,6.001,NA,7,NA,7.001,NA)
scores_hungary_eu_emp <- c(9.99,NA,9.98,NA,10,NA,9,NA)
scores_hungary_ec_emp <- c(8,NA,7,NA,7.01,NA,8.01,NA)
scores_hungary_so_emp <- c(4,NA,5,NA,6,NA,2,NA)

ws_hungary_lr_bl <- textmodel_wordscores(hungary, scores_hungary_lr_bl)
ws_hungary_ec_bl <- textmodel_wordscores(hungary, scores_hungary_ec_bl)
ws_hungary_so_bl <- textmodel_wordscores(hungary, scores_hungary_so_bl)

ws_hungary_lr_ches <- textmodel_wordscores(hungary, scores_hungary_lr_ches)
ws_hungary_eu_ches <- textmodel_wordscores(hungary, scores_hungary_eu_ches)
ws_hungary_ec_ches <- textmodel_wordscores(hungary, scores_hungary_ec_ches)
ws_hungary_so_ches <- textmodel_wordscores(hungary, scores_hungary_so_ches)

ws_hungary_lr_emp <- textmodel_wordscores(hungary, scores_hungary_lr_emp)
ws_hungary_eu_emp <- textmodel_wordscores(hungary, scores_hungary_eu_emp)
ws_hungary_ec_emp <- textmodel_wordscores(hungary, scores_hungary_ec_emp)
ws_hungary_so_emp <- textmodel_wordscores(hungary, scores_hungary_so_emp)


hungary_lr_bl_lbg <- as.data.frame(predict(ws_hungary_lr_bl, rescaling = "lbg"))
hungary_eu_bl_lbg <-data.frame(y = c(NA,NA,NA,NA,NA,NA,NA,NA), stringsAsFactors=FALSE)
hungary_ec_bl_lbg <- as.data.frame(predict(ws_hungary_ec_bl, rescaling = "lbg"))
hungary_so_bl_lbg <- as.data.frame(predict(ws_hungary_so_bl, rescaling = "lbg"))

hungary_lr_ches_lbg <- as.data.frame(predict(ws_hungary_lr_ches, rescaling = "lbg"))
hungary_eu_ches_lbg <- as.data.frame(predict(ws_hungary_eu_ches, rescaling = "lbg"))
hungary_ec_ches_lbg <- as.data.frame(predict(ws_hungary_ec_ches, rescaling = "lbg"))
hungary_so_ches_lbg <- as.data.frame(predict(ws_hungary_so_ches, rescaling = "lbg"))

hungary_lr_emp_lbg <- as.data.frame(predict(ws_hungary_lr_emp, rescaling = "lbg"))
hungary_eu_emp_lbg <- as.data.frame(predict(ws_hungary_eu_emp, rescaling = "lbg"))
hungary_ec_emp_lbg <- as.data.frame(predict(ws_hungary_ec_emp, rescaling = "lbg"))
hungary_so_emp_lbg <- as.data.frame(predict(ws_hungary_so_emp, rescaling = "lbg"))

hungary_lr_bl_mv <- as.data.frame(predict(ws_hungary_lr_bl, rescaling = "mv"))
hungary_eu_bl_mv <-data.frame(y = c(NA,NA,NA,NA,NA,NA,NA,NA), stringsAsFactors=FALSE)
hungary_ec_bl_mv <- as.data.frame(predict(ws_hungary_ec_bl, rescaling = "mv"))
hungary_so_bl_mv <- as.data.frame(predict(ws_hungary_so_bl, rescaling = "mv"))

hungary_lr_ches_mv <- as.data.frame(predict(ws_hungary_lr_ches, rescaling = "mv"))
hungary_eu_ches_mv <- as.data.frame(predict(ws_hungary_eu_ches, rescaling = "mv"))
hungary_ec_ches_mv <- as.data.frame(predict(ws_hungary_ec_ches, rescaling = "mv"))
hungary_so_ches_mv <- as.data.frame(predict(ws_hungary_so_ches, rescaling = "mv"))

hungary_lr_emp_mv <- as.data.frame(predict(ws_hungary_lr_emp, rescaling = "mv"))
hungary_eu_emp_mv <- as.data.frame(predict(ws_hungary_eu_emp, rescaling = "mv"))
hungary_ec_emp_mv <- as.data.frame(predict(ws_hungary_ec_emp, rescaling = "mv"))
hungary_so_emp_mv <- as.data.frame(predict(ws_hungary_so_emp, rescaling = "mv"))

names(hungary_lr_bl_lbg)[1] <- "bl_lr_lbg"
names(hungary_eu_bl_lbg)[1] <- "bl_eu_lbg"
names(hungary_ec_bl_lbg)[1] <- "bl_ec_lbg"
names(hungary_so_bl_lbg)[1] <- "bl_so_lbg"

names(hungary_lr_ches_lbg)[1] <- "ches_lr_lbg"
names(hungary_eu_ches_lbg)[1] <- "ches_eu_lbg"
names(hungary_ec_ches_lbg)[1] <- "ches_ec_lbg"
names(hungary_so_ches_lbg)[1] <- "ches_so_lbg"

names(hungary_lr_emp_lbg)[1] <- "emp_lr_lbg"
names(hungary_eu_emp_lbg)[1] <- "emp_eu_lbg"
names(hungary_ec_emp_lbg)[1] <- "emp_ec_lbg"
names(hungary_so_emp_lbg)[1] <- "emp_so_lbg"

names(hungary_lr_bl_mv)[1] <- "bl_lr_mv"
names(hungary_eu_bl_mv)[1] <- "bl_eu_mv"
names(hungary_ec_bl_mv)[1] <- "bl_ec_mv"
names(hungary_so_bl_mv)[1] <- "bl_so_mv"

names(hungary_lr_ches_mv)[1] <- "ches_lr_mv"
names(hungary_eu_ches_mv)[1] <- "ches_eu_mv"
names(hungary_ec_ches_mv)[1] <- "ches_ec_mv"
names(hungary_so_ches_mv)[1] <- "ches_so_mv"

names(hungary_lr_emp_mv)[1] <- "emp_lr_mv"
names(hungary_eu_emp_mv)[1] <- "emp_eu_mv"
names(hungary_ec_emp_mv)[1] <- "emp_ec_mv"
names(hungary_so_emp_mv)[1] <- "emp_so_mv"

hungary_wordscores <- cbind(hungary_lr_bl_lbg, hungary_eu_bl_lbg, hungary_ec_bl_lbg, hungary_so_bl_lbg,hungary_lr_ches_lbg, hungary_eu_ches_lbg, hungary_ec_ches_lbg, hungary_so_ches_lbg,hungary_lr_emp_lbg, hungary_eu_emp_lbg, hungary_ec_emp_lbg, hungary_so_emp_lbg,hungary_lr_bl_mv, hungary_eu_bl_mv, hungary_ec_bl_mv, hungary_so_bl_mv,hungary_lr_ches_mv, hungary_eu_ches_mv, hungary_ec_ches_mv, hungary_so_ches_mv,hungary_lr_emp_mv, hungary_eu_emp_mv, hungary_ec_emp_mv, hungary_so_emp_mv)
hungary_wordscores <- as.matrix(hungary_wordscores)
hungary_wordscores <- as.data.frame(hungary_wordscores)
setwd("~/Downloads/Replication Files/Wordscores/")
write.csv(hungary_wordscores, file= "hungary_wordscores.csv")
setwd("~/Downloads/Replication Files/Manifestos/")


################################################################################## Ireland ################################################################################

FF_04 <- pdf_text("Ireland/FF_04.pdf")
FG_04 <- pdf_text("Ireland/FG_04.pdf")
GREEN_04 <- pdf_text("Ireland/GREEN_04.pdf")
LABOUR_04 <- pdf_text("Ireland/LABOUR_04.pdf")
SF_04 <- pdf_text("Ireland/SF_04.pdf")

FF_09 <- pdf_text("Ireland/FF_09.pdf")
FG_09 <- pdf_text("Ireland/FG_09.pdf")
GREEN_09 <- pdf_text("Ireland/GREEN_09.pdf")
LABOUR_09 <- pdf_text("Ireland/LABOUR_09.pdf")
SF_09 <- pdf_text("Ireland/SF_09.pdf")

write(FF_04, "Ireland/FF_04.txt")
write(FG_04, "Ireland/FG_04.txt")
write(GREEN_04, "Ireland/GREEN_04.txt")
write(LABOUR_04, "Ireland/LABOUR_04.txt")
write(SF_04, "Ireland/SF_04.txt")
write(FF_09, "Ireland/FF_09.txt")
write(FG_09, "Ireland/FG_09.txt")
write(GREEN_09, "Ireland/GREEN_09.txt")
write(LABOUR_09, "Ireland/LABOUR_09.txt")
write(SF_09, "Ireland/SF_09.txt")

ireland_texts <- readtext("Ireland/*.txt")
ireland_corpus <- corpus(ireland_texts)
ireland_dfm <- dfm(ireland_corpus)
is.dfm(ireland_dfm)
ireland <- ireland_dfm

# Cleaning

ireland <- dfm_tolower(ireland)
ireland <- dfm_select(ireland,"[[:punct:]]", selection = "remove", valuetype = "regex", verbose = TRUE)
ireland <- dfm_select(ireland,"[[:digit:]]", selection = "remove", valuetype = "regex", verbose = TRUE)
ireland <- dfm_select(ireland,"[[:cntrl:]]", selection = "remove", valuetype = "regex", verbose = TRUE)
ireland <- dfm_select(ireland,"[[:space:]]", selection = "remove", valuetype = "regex", verbose = TRUE)
ireland <- dfm_select(ireland,"[[:blank:]]", selection = "remove", valuetype = "regex", verbose = TRUE)
ireland <- dfm_select(ireland, stopwords(language = "en", source = "stopwords-iso"), selection = "remove", valuetype = "fixed", verbose = TRUE)

ireland_dfm_words <- as.data.frame(ntoken(ireland_dfm))
ireland_dfm_uniquewords <- as.data.frame(ntype(ireland_dfm))

# Run Wordscores

# Ireland

ireland@Dimnames$docs
scores_ireland_lr_bl <- c(13.28301906586,NA,12.73584938049,NA,5.705882549286,NA,7.377358436584,NA,6.307692527771,NA)
scores_ireland_eu_bl <- c(8.3076897,NA,12.692307,NA,3.94118,NA,10.77359,NA,3.96154,NA)
scores_ireland_ec_bl <- c(13.754717,NA,12.45283,NA,5.7843137,NA,6.5961537,NA,4.8600001,NA)
scores_ireland_so_bl <- c(14.81132,NA,11.471698,NA,5.6199999,NA,5.981132,NA,9.5510206,NA)

scores_ireland_lr_ches <- c(5.630000114441,NA,5.880000114441,NA,2.130000114441,NA,3.25,NA,2.75,NA)
scores_ireland_eu_ches <- c(5.630000114441,NA,6.630000114441,NA,2.75,NA,5.130000114441,NA,2,NA)
scores_ireland_ec_ches <- c(5.25,NA,5.3800001,NA,2.1300001,NA,3,NA,1.75,NA)
scores_ireland_so_ches <- c(7.1300001,NA,6.25,NA,2.3800001,NA,3.75,NA,6,NA)

scores_ireland_lr_emp <- c(7,NA,8,NA,3.001,NA,4,NA,3,NA)
scores_ireland_eu_emp <- c(8,NA,6,NA,4,NA,7,NA,3,NA)
scores_ireland_ec_emp <- c(5,NA,7,NA,3,NA,4,NA,3,NA)
scores_ireland_so_emp <- c(6.001,NA,6,NA,3,NA,3.001,NA,2.99,NA)

ws_ireland_lr_bl <- textmodel_wordscores(ireland, scores_ireland_lr_bl)
ws_ireland_eu_bl <- textmodel_wordscores(ireland, scores_ireland_eu_bl)
ws_ireland_ec_bl <- textmodel_wordscores(ireland, scores_ireland_ec_bl)
ws_ireland_so_bl <- textmodel_wordscores(ireland, scores_ireland_so_bl)

ws_ireland_lr_ches <- textmodel_wordscores(ireland, scores_ireland_lr_ches)
ws_ireland_eu_ches <- textmodel_wordscores(ireland, scores_ireland_eu_ches)
ws_ireland_ec_ches <- textmodel_wordscores(ireland, scores_ireland_ec_ches)
ws_ireland_so_ches <- textmodel_wordscores(ireland, scores_ireland_so_ches)

ws_ireland_lr_emp <- textmodel_wordscores(ireland, scores_ireland_lr_emp)
ws_ireland_eu_emp <- textmodel_wordscores(ireland, scores_ireland_eu_emp)
ws_ireland_ec_emp <- textmodel_wordscores(ireland, scores_ireland_ec_emp)
ws_ireland_so_emp <- textmodel_wordscores(ireland, scores_ireland_so_emp)


ireland_lr_bl_lbg <- as.data.frame(predict(ws_ireland_lr_bl, rescaling = "lbg"))
ireland_eu_bl_lbg <- as.data.frame(predict(ws_ireland_eu_bl, rescaling = "lbg"))
ireland_ec_bl_lbg <- as.data.frame(predict(ws_ireland_ec_bl, rescaling = "lbg"))
ireland_so_bl_lbg <- as.data.frame(predict(ws_ireland_so_bl, rescaling = "lbg"))

ireland_lr_ches_lbg <- as.data.frame(predict(ws_ireland_lr_ches, rescaling = "lbg"))
ireland_eu_ches_lbg <- as.data.frame(predict(ws_ireland_eu_ches, rescaling = "lbg"))
ireland_ec_ches_lbg <- as.data.frame(predict(ws_ireland_ec_ches, rescaling = "lbg"))
ireland_so_ches_lbg <- as.data.frame(predict(ws_ireland_so_ches, rescaling = "lbg"))

ireland_lr_emp_lbg <- as.data.frame(predict(ws_ireland_lr_emp, rescaling = "lbg"))
ireland_eu_emp_lbg <- as.data.frame(predict(ws_ireland_eu_emp, rescaling = "lbg"))
ireland_ec_emp_lbg <- as.data.frame(predict(ws_ireland_ec_emp, rescaling = "lbg"))
ireland_so_emp_lbg <- as.data.frame(predict(ws_ireland_so_emp, rescaling = "lbg"))

ireland_lr_bl_mv <- as.data.frame(predict(ws_ireland_lr_bl, rescaling = "mv"))
ireland_eu_bl_mv <- as.data.frame(predict(ws_ireland_eu_bl, rescaling = "mv"))
ireland_ec_bl_mv <- as.data.frame(predict(ws_ireland_ec_bl, rescaling = "mv"))
ireland_so_bl_mv <- as.data.frame(predict(ws_ireland_so_bl, rescaling = "mv"))

ireland_lr_ches_mv <- as.data.frame(predict(ws_ireland_lr_ches, rescaling = "mv"))
ireland_eu_ches_mv <- as.data.frame(predict(ws_ireland_eu_ches, rescaling = "mv"))
ireland_ec_ches_mv <- as.data.frame(predict(ws_ireland_ec_ches, rescaling = "mv"))
ireland_so_ches_mv <- as.data.frame(predict(ws_ireland_so_ches, rescaling = "mv"))

ireland_lr_emp_mv <- as.data.frame(predict(ws_ireland_lr_emp, rescaling = "mv"))
ireland_eu_emp_mv <- as.data.frame(predict(ws_ireland_eu_emp, rescaling = "mv"))
ireland_ec_emp_mv <- as.data.frame(predict(ws_ireland_ec_emp, rescaling = "mv"))
ireland_so_emp_mv <- as.data.frame(predict(ws_ireland_so_emp, rescaling = "mv"))

names(ireland_lr_bl_lbg)[1] <- "bl_lr_lbg"
names(ireland_eu_bl_lbg)[1] <- "bl_eu_lbg"
names(ireland_ec_bl_lbg)[1] <- "bl_ec_lbg"
names(ireland_so_bl_lbg)[1] <- "bl_so_lbg"

names(ireland_lr_ches_lbg)[1] <- "ches_lr_lbg"
names(ireland_eu_ches_lbg)[1] <- "ches_eu_lbg"
names(ireland_ec_ches_lbg)[1] <- "ches_ec_lbg"
names(ireland_so_ches_lbg)[1] <- "ches_so_lbg"

names(ireland_lr_emp_lbg)[1] <- "emp_lr_lbg"
names(ireland_eu_emp_lbg)[1] <- "emp_eu_lbg"
names(ireland_ec_emp_lbg)[1] <- "emp_ec_lbg"
names(ireland_so_emp_lbg)[1] <- "emp_so_lbg"

names(ireland_lr_bl_mv)[1] <- "bl_lr_mv"
names(ireland_eu_bl_mv)[1] <- "bl_eu_mv"
names(ireland_ec_bl_mv)[1] <- "bl_ec_mv"
names(ireland_so_bl_mv)[1] <- "bl_so_mv"

names(ireland_lr_ches_mv)[1] <- "ches_lr_mv"
names(ireland_eu_ches_mv)[1] <- "ches_eu_mv"
names(ireland_ec_ches_mv)[1] <- "ches_ec_mv"
names(ireland_so_ches_mv)[1] <- "ches_so_mv"

names(ireland_lr_emp_mv)[1] <- "emp_lr_mv"
names(ireland_eu_emp_mv)[1] <- "emp_eu_mv"
names(ireland_ec_emp_mv)[1] <- "emp_ec_mv"
names(ireland_so_emp_mv)[1] <- "emp_so_mv"

ireland_wordscores <- cbind(ireland_lr_bl_lbg, ireland_eu_bl_lbg, ireland_ec_bl_lbg, ireland_so_bl_lbg,ireland_lr_ches_lbg, ireland_eu_ches_lbg, ireland_ec_ches_lbg, ireland_so_ches_lbg,ireland_lr_emp_lbg, ireland_eu_emp_lbg, ireland_ec_emp_lbg, ireland_so_emp_lbg,ireland_lr_bl_mv, ireland_eu_bl_mv, ireland_ec_bl_mv, ireland_so_bl_mv,ireland_lr_ches_mv, ireland_eu_ches_mv, ireland_ec_ches_mv, ireland_so_ches_mv,ireland_lr_emp_mv, ireland_eu_emp_mv, ireland_ec_emp_mv, ireland_so_emp_mv)
ireland_wordscores <- as.matrix(ireland_wordscores)
ireland_wordscores <- as.data.frame(ireland_wordscores)
setwd("~/Downloads/Replication Files/Wordscores/")
write.csv(ireland_wordscores, file= "ireland_wordscores.csv")
setwd("~/Downloads/Replication Files/Manifestos/")



################################################################################## Italy ################################################################################

FI_04 <- pdf_text("Italy/FI_04.pdf")
LN_04 <- pdf_text("Italy/LN_04.pdf")
PD_04 <- pdf_text("Italy/PD_04.pdf")
PRC_04 <- pdf_text("Italy/PRC_04.pdf")

FI_09 <- pdf_text("Italy/FI_09.pdf")
LN_09 <- pdf_text("Italy/LN_09.pdf")
PD_09 <- pdf_text("Italy/PD_09.pdf")
PRC_09 <- pdf_text("Italy/PRC_09.pdf")

write(FI_04, "Italy/FI_04.txt")
write(LN_04, "Italy/LN_04.txt")
write(PD_04, "Italy/PD_04.txt")
write(PRC_04, "Italy/PRC_04.txt")
write(FI_09, "Italy/FI_09.txt")
write(LN_09, "Italy/LN_09.txt")
write(PD_09, "Italy/PD_09.txt")
write(PRC_09, "Italy/PRC_09.txt")

italy_texts <- readtext("Italy/*.txt")
italy_corpus <- corpus(italy_texts)
italy_dfm <- dfm(italy_corpus)
is.dfm(italy_dfm)
italy <- italy_dfm

# Cleaning

italy <- dfm_tolower(italy)
italy <- dfm_select(italy,"[[:punct:]]", selection = "remove", valuetype = "regex", verbose = TRUE)
italy <- dfm_select(italy,"[[:digit:]]", selection = "remove", valuetype = "regex", verbose = TRUE)
italy <- dfm_select(italy,"[[:cntrl:]]", selection = "remove", valuetype = "regex", verbose = TRUE)
italy <- dfm_select(italy,"[[:space:]]", selection = "remove", valuetype = "regex", verbose = TRUE)
italy <- dfm_select(italy,"[[:blank:]]", selection = "remove", valuetype = "regex", verbose = TRUE)
italy <- dfm_select(italy, stopwords(language = "it", source = "stopwords-iso"), selection = "remove", valuetype = "fixed", verbose = TRUE)

italy_dfm_words <- as.data.frame(ntoken(italy_dfm))
italy_dfm_uniquewords <- as.data.frame(ntype(italy_dfm))

# Run Wordscores

# Italy

italy@Dimnames$docs
scores_italy_lr_bl <- c(15.59259223938,NA,16.8867931366,NA,5.981132030487,NA,2.148148059845,NA)
scores_italy_eu_bl <- c(6.3846102,NA,3.0576899,NA,15.884615,NA,10.34694,NA)
scores_italy_ec_bl <- c(17.489361,NA,15.063829,NA,6.6666665,NA,2.9166667,NA)
scores_italy_so_bl <- c(12.875,NA,17.063829,NA,4.9791665,NA,3.6875,NA)

scores_italy_lr_ches <- c(6.929999828339,NA,7.710000038147,NA,3.140000104904,NA,1.929999947548,NA)
scores_italy_eu_ches <- c(4.619999885559,NA,2.30999994278,NA,6.460000038147,NA,3.539999961853,NA)
scores_italy_ec_ches <- c(7.9299998,NA,7.2800002,NA,3.4200001,NA,.70999998,NA)
scores_italy_so_ches <- c(6.3099999,NA,8.2299995,NA,2.54,NA,2.6199999,NA)

scores_italy_lr_emp <- c(9,NA,8,NA,3,NA,1,NA)
scores_italy_eu_emp <- c(9,NA,4,NA,10,NA,2,NA)
scores_italy_ec_emp <- c(9,NA,7,NA,4,NA,2,NA)
scores_italy_so_emp <- c(3,NA,8,NA,3.001,NA,NA,NA)

ws_italy_lr_bl <- textmodel_wordscores(italy, scores_italy_lr_bl)
ws_italy_eu_bl <- textmodel_wordscores(italy, scores_italy_eu_bl)
ws_italy_ec_bl <- textmodel_wordscores(italy, scores_italy_ec_bl)
ws_italy_so_bl <- textmodel_wordscores(italy, scores_italy_so_bl)

ws_italy_lr_ches <- textmodel_wordscores(italy, scores_italy_lr_ches)
ws_italy_eu_ches <- textmodel_wordscores(italy, scores_italy_eu_ches)
ws_italy_ec_ches <- textmodel_wordscores(italy, scores_italy_ec_ches)
ws_italy_so_ches <- textmodel_wordscores(italy, scores_italy_so_ches)

ws_italy_lr_emp <- textmodel_wordscores(italy, scores_italy_lr_emp)
ws_italy_eu_emp <- textmodel_wordscores(italy, scores_italy_eu_emp)
ws_italy_ec_emp <- textmodel_wordscores(italy, scores_italy_ec_emp)
ws_italy_so_emp <- textmodel_wordscores(italy, scores_italy_so_emp)


italy_lr_bl_lbg <- as.data.frame(predict(ws_italy_lr_bl, rescaling = "lbg"))
italy_eu_bl_lbg <- as.data.frame(predict(ws_italy_eu_bl, rescaling = "lbg"))
italy_ec_bl_lbg <- as.data.frame(predict(ws_italy_ec_bl, rescaling = "lbg"))
italy_so_bl_lbg <- as.data.frame(predict(ws_italy_so_bl, rescaling = "lbg"))

italy_lr_ches_lbg <- as.data.frame(predict(ws_italy_lr_ches, rescaling = "lbg"))
italy_eu_ches_lbg <- as.data.frame(predict(ws_italy_eu_ches, rescaling = "lbg"))
italy_ec_ches_lbg <- as.data.frame(predict(ws_italy_ec_ches, rescaling = "lbg"))
italy_so_ches_lbg <- as.data.frame(predict(ws_italy_so_ches, rescaling = "lbg"))

italy_lr_emp_lbg <- as.data.frame(predict(ws_italy_lr_emp, rescaling = "lbg"))
italy_eu_emp_lbg <- as.data.frame(predict(ws_italy_eu_emp, rescaling = "lbg"))
italy_ec_emp_lbg <- as.data.frame(predict(ws_italy_ec_emp, rescaling = "lbg"))
italy_so_emp_lbg <- as.data.frame(predict(ws_italy_so_emp, rescaling = "lbg"))

italy_lr_bl_mv <- as.data.frame(predict(ws_italy_lr_bl, rescaling = "mv"))
italy_eu_bl_mv <- as.data.frame(predict(ws_italy_eu_bl, rescaling = "mv"))
italy_ec_bl_mv <- as.data.frame(predict(ws_italy_ec_bl, rescaling = "mv"))
italy_so_bl_mv <- as.data.frame(predict(ws_italy_so_bl, rescaling = "mv"))

italy_lr_ches_mv <- as.data.frame(predict(ws_italy_lr_ches, rescaling = "mv"))
italy_eu_ches_mv <- as.data.frame(predict(ws_italy_eu_ches, rescaling = "mv"))
italy_ec_ches_mv <- as.data.frame(predict(ws_italy_ec_ches, rescaling = "mv"))
italy_so_ches_mv <- as.data.frame(predict(ws_italy_so_ches, rescaling = "mv"))

italy_lr_emp_mv <- as.data.frame(predict(ws_italy_lr_emp, rescaling = "mv"))
italy_eu_emp_mv <- as.data.frame(predict(ws_italy_eu_emp, rescaling = "mv"))
italy_ec_emp_mv <- as.data.frame(predict(ws_italy_ec_emp, rescaling = "mv"))
italy_so_emp_mv <- as.data.frame(predict(ws_italy_so_emp, rescaling = "mv"))

names(italy_lr_bl_lbg)[1] <- "bl_lr_lbg"
names(italy_eu_bl_lbg)[1] <- "bl_eu_lbg"
names(italy_ec_bl_lbg)[1] <- "bl_ec_lbg"
names(italy_so_bl_lbg)[1] <- "bl_so_lbg"

names(italy_lr_ches_lbg)[1] <- "ches_lr_lbg"
names(italy_eu_ches_lbg)[1] <- "ches_eu_lbg"
names(italy_ec_ches_lbg)[1] <- "ches_ec_lbg"
names(italy_so_ches_lbg)[1] <- "ches_so_lbg"

names(italy_lr_emp_lbg)[1] <- "emp_lr_lbg"
names(italy_eu_emp_lbg)[1] <- "emp_eu_lbg"
names(italy_ec_emp_lbg)[1] <- "emp_ec_lbg"
names(italy_so_emp_lbg)[1] <- "emp_so_lbg"

names(italy_lr_bl_mv)[1] <- "bl_lr_mv"
names(italy_eu_bl_mv)[1] <- "bl_eu_mv"
names(italy_ec_bl_mv)[1] <- "bl_ec_mv"
names(italy_so_bl_mv)[1] <- "bl_so_mv"

names(italy_lr_ches_mv)[1] <- "ches_lr_mv"
names(italy_eu_ches_mv)[1] <- "ches_eu_mv"
names(italy_ec_ches_mv)[1] <- "ches_ec_mv"
names(italy_so_ches_mv)[1] <- "ches_so_mv"

names(italy_lr_emp_mv)[1] <- "emp_lr_mv"
names(italy_eu_emp_mv)[1] <- "emp_eu_mv"
names(italy_ec_emp_mv)[1] <- "emp_ec_mv"
names(italy_so_emp_mv)[1] <- "emp_so_mv"

italy_wordscores <- cbind(italy_lr_bl_lbg, italy_eu_bl_lbg, italy_ec_bl_lbg, italy_so_bl_lbg,italy_lr_ches_lbg, italy_eu_ches_lbg, italy_ec_ches_lbg, italy_so_ches_lbg,italy_lr_emp_lbg, italy_eu_emp_lbg, italy_ec_emp_lbg, italy_so_emp_lbg,italy_lr_bl_mv, italy_eu_bl_mv, italy_ec_bl_mv, italy_so_bl_mv,italy_lr_ches_mv, italy_eu_ches_mv, italy_ec_ches_mv, italy_so_ches_mv,italy_lr_emp_mv, italy_eu_emp_mv, italy_ec_emp_mv, italy_so_emp_mv)
italy_wordscores <- as.matrix(italy_wordscores)
italy_wordscores <- as.data.frame(italy_wordscores)
setwd("~/Downloads/Replication Files/Wordscores/")
write.csv(italy_wordscores, file= "italy_wordscores.csv")
setwd("~/Downloads/Replication Files/Manifestos/")


################################################################################## Latvia ################################################################################

JL_04 <- pdf_text("Latvia/JL_04.pdf")
LKS_04 <- pdf_text("Latvia/LKS_04.pdf")
LPPLC_04 <- pdf_text("Latvia/LPPLC_04.pdf")
TBLNNK_04 <- pdf_text("Latvia/TBLNNK_04.pdf")
TP_04 <- pdf_text("Latvia/TP_04.pdf")

JL_09 <- pdf_text("Latvia/JL_09.pdf")
LKS_09 <- pdf_text("Latvia/LKS_09.pdf")
LPPLC_09 <- pdf_text("Latvia/LPPLC_09.pdf")
TBLNNK_09 <- pdf_text("Latvia/TBLNNK_09.pdf")
TP_09 <- pdf_text("Latvia/TP_09.pdf")

write(JL_04, "Latvia/JL_04.txt")
write(LKS_04, "Latvia/LKS_04.txt")
write(LPPLC_04, "Latvia/LPPLC_04.txt")
write(TBLNNK_04, "Latvia/TBLNNK_04.txt")
write(TP_04, "Latvia/TP_04.txt")
write(JL_09, "Latvia/JL_09.txt")
write(LKS_09, "Latvia/LKS_09.txt")
write(LPPLC_09, "Latvia/LPPLC_09.txt")
write(TBLNNK_09, "Latvia/TBLNNK_09.txt")
write(TP_09, "Latvia/TP_09.txt")

latvia_texts <- readtext("Latvia/*.txt")
latvia_corpus <- corpus(latvia_texts)
latvia_dfm <- dfm(latvia_corpus)
is.dfm(latvia_dfm)
latvia <- latvia_dfm

# Cleaning

latvia <- dfm_tolower(latvia)
latvia <- dfm_select(latvia,"[[:punct:]]", selection = "remove", valuetype = "regex", verbose = TRUE)
latvia <- dfm_select(latvia,"[[:digit:]]", selection = "remove", valuetype = "regex", verbose = TRUE)
latvia <- dfm_select(latvia,"[[:cntrl:]]", selection = "remove", valuetype = "regex", verbose = TRUE)
latvia <- dfm_select(latvia,"[[:space:]]", selection = "remove", valuetype = "regex", verbose = TRUE)
latvia <- dfm_select(latvia,"[[:blank:]]", selection = "remove", valuetype = "regex", verbose = TRUE)
latvia <- dfm_select(latvia, stopwords(language = "lv", source = "stopwords-iso"), selection = "remove", valuetype = "fixed", verbose = TRUE)

latvia_dfm_words <- as.data.frame(ntoken(latvia_dfm))
latvia_dfm_uniquewords <- as.data.frame(ntype(latvia_dfm))

# Run Wordscores

# Latvia

latvia@Dimnames$docs
scores_latvia_lr_bl <- c(16.5,NA,3.375,NA,13.375,NA,16.25,NA,18.125,NA)
scores_latvia_ec_bl <- c(14.75,NA,7,NA,12.625,NA,12.875,NA,14.875,NA)
scores_latvia_so_bl <- c(11.625,NA,12.25,NA,18.375,NA,14.5,NA,13.625,NA)

scores_latvia_lr_ches <- c(6.75,NA,1.25,NA,5.5,NA,7.75,NA,7,NA)
scores_latvia_eu_ches <- c(6.5,NA,4.25,NA,5.75,NA,5.25,NA,6.25,NA)
scores_latvia_ec_ches <- c(7,NA,1.67,NA,6.6700001,NA,5.3299999,NA,8.3299999,NA)
scores_latvia_so_ches <- c(6.3299999,NA,6.3299999,NA,8.3299999,NA,6.6700001,NA,6,NA)

scores_latvia_lr_emp <- c(7.0001,NA,3,NA,7.00001,NA,7.001,NA,7,NA)
scores_latvia_eu_emp <- c(8,NA,7,NA,7.5,NA,7,NA,8,NA)
scores_latvia_ec_emp <- c(7.00001,NA,4,NA,7.001,NA,NA,NA,7.00001,NA)
scores_latvia_so_emp <- c(6.001,NA,NA,NA,4,NA,NA,NA,6,NA)

ws_latvia_lr_bl <- textmodel_wordscores(latvia, scores_latvia_lr_bl)
ws_latvia_ec_bl <- textmodel_wordscores(latvia, scores_latvia_ec_bl)
ws_latvia_so_bl <- textmodel_wordscores(latvia, scores_latvia_so_bl)

ws_latvia_lr_ches <- textmodel_wordscores(latvia, scores_latvia_lr_ches)
ws_latvia_eu_ches <- textmodel_wordscores(latvia, scores_latvia_eu_ches)
ws_latvia_ec_ches <- textmodel_wordscores(latvia, scores_latvia_ec_ches)
ws_latvia_so_ches <- textmodel_wordscores(latvia, scores_latvia_so_ches)

ws_latvia_lr_emp <- textmodel_wordscores(latvia, scores_latvia_lr_emp)
ws_latvia_eu_emp <- textmodel_wordscores(latvia, scores_latvia_eu_emp)
ws_latvia_ec_emp <- textmodel_wordscores(latvia, scores_latvia_ec_emp)
ws_latvia_so_emp <- textmodel_wordscores(latvia, scores_latvia_so_emp)


latvia_lr_bl_lbg <- as.data.frame(predict(ws_latvia_lr_bl, rescaling = "lbg"))
latvia_eu_bl_lbg <-data.frame(y = c(NA,NA,NA,NA,NA,NA,NA,NA,NA,NA), stringsAsFactors=FALSE)
latvia_ec_bl_lbg <- as.data.frame(predict(ws_latvia_ec_bl, rescaling = "lbg"))
latvia_so_bl_lbg <- as.data.frame(predict(ws_latvia_so_bl, rescaling = "lbg"))

latvia_lr_ches_lbg <- as.data.frame(predict(ws_latvia_lr_ches, rescaling = "lbg"))
latvia_eu_ches_lbg <- as.data.frame(predict(ws_latvia_eu_ches, rescaling = "lbg"))
latvia_ec_ches_lbg <- as.data.frame(predict(ws_latvia_ec_ches, rescaling = "lbg"))
latvia_so_ches_lbg <- as.data.frame(predict(ws_latvia_so_ches, rescaling = "lbg"))

latvia_lr_emp_lbg <- as.data.frame(predict(ws_latvia_lr_emp, rescaling = "lbg"))
latvia_eu_emp_lbg <- as.data.frame(predict(ws_latvia_eu_emp, rescaling = "lbg"))
latvia_ec_emp_lbg <- as.data.frame(predict(ws_latvia_ec_emp, rescaling = "lbg"))
latvia_so_emp_lbg <- as.data.frame(predict(ws_latvia_so_emp, rescaling = "lbg"))

latvia_lr_bl_mv <- as.data.frame(predict(ws_latvia_lr_bl, rescaling = "mv"))
latvia_eu_bl_mv <-data.frame(y = c(NA,NA,NA,NA,NA,NA,NA,NA,NA,NA), stringsAsFactors=FALSE)
latvia_ec_bl_mv <- as.data.frame(predict(ws_latvia_ec_bl, rescaling = "mv"))
latvia_so_bl_mv <- as.data.frame(predict(ws_latvia_so_bl, rescaling = "mv"))

latvia_lr_ches_mv <- as.data.frame(predict(ws_latvia_lr_ches, rescaling = "mv"))
latvia_eu_ches_mv <- as.data.frame(predict(ws_latvia_eu_ches, rescaling = "mv"))
latvia_ec_ches_mv <- as.data.frame(predict(ws_latvia_ec_ches, rescaling = "mv"))
latvia_so_ches_mv <- as.data.frame(predict(ws_latvia_so_ches, rescaling = "mv"))

latvia_lr_emp_mv <- as.data.frame(predict(ws_latvia_lr_emp, rescaling = "mv"))
latvia_eu_emp_mv <- as.data.frame(predict(ws_latvia_eu_emp, rescaling = "mv"))
latvia_ec_emp_mv <- as.data.frame(predict(ws_latvia_ec_emp, rescaling = "mv"))
latvia_so_emp_mv <- as.data.frame(predict(ws_latvia_so_emp, rescaling = "mv"))

names(latvia_lr_bl_lbg)[1] <- "bl_lr_lbg"
names(latvia_eu_bl_lbg)[1] <- "bl_eu_lbg"
names(latvia_ec_bl_lbg)[1] <- "bl_ec_lbg"
names(latvia_so_bl_lbg)[1] <- "bl_so_lbg"

names(latvia_lr_ches_lbg)[1] <- "ches_lr_lbg"
names(latvia_eu_ches_lbg)[1] <- "ches_eu_lbg"
names(latvia_ec_ches_lbg)[1] <- "ches_ec_lbg"
names(latvia_so_ches_lbg)[1] <- "ches_so_lbg"

names(latvia_lr_emp_lbg)[1] <- "emp_lr_lbg"
names(latvia_eu_emp_lbg)[1] <- "emp_eu_lbg"
names(latvia_ec_emp_lbg)[1] <- "emp_ec_lbg"
names(latvia_so_emp_lbg)[1] <- "emp_so_lbg"

names(latvia_lr_bl_mv)[1] <- "bl_lr_mv"
names(latvia_eu_bl_mv)[1] <- "bl_eu_mv"
names(latvia_ec_bl_mv)[1] <- "bl_ec_mv"
names(latvia_so_bl_mv)[1] <- "bl_so_mv"

names(latvia_lr_ches_mv)[1] <- "ches_lr_mv"
names(latvia_eu_ches_mv)[1] <- "ches_eu_mv"
names(latvia_ec_ches_mv)[1] <- "ches_ec_mv"
names(latvia_so_ches_mv)[1] <- "ches_so_mv"

names(latvia_lr_emp_mv)[1] <- "emp_lr_mv"
names(latvia_eu_emp_mv)[1] <- "emp_eu_mv"
names(latvia_ec_emp_mv)[1] <- "emp_ec_mv"
names(latvia_so_emp_mv)[1] <- "emp_so_mv"

latvia_wordscores <- cbind(latvia_lr_bl_lbg, latvia_eu_bl_lbg, latvia_ec_bl_lbg, latvia_so_bl_lbg,latvia_lr_ches_lbg, latvia_eu_ches_lbg, latvia_ec_ches_lbg, latvia_so_ches_lbg,latvia_lr_emp_lbg, latvia_eu_emp_lbg, latvia_ec_emp_lbg, latvia_so_emp_lbg,latvia_lr_bl_mv, latvia_eu_bl_mv, latvia_ec_bl_mv, latvia_so_bl_mv,latvia_lr_ches_mv, latvia_eu_ches_mv, latvia_ec_ches_mv, latvia_so_ches_mv,latvia_lr_emp_mv, latvia_eu_emp_mv, latvia_ec_emp_mv, latvia_so_emp_mv)
latvia_wordscores <- as.matrix(latvia_wordscores)
latvia_wordscores <- as.data.frame(latvia_wordscores)
setwd("~/Downloads/Replication Files/Wordscores/")
write.csv(latvia_wordscores, file= "latvia_wordscores.csv")
setwd("~/Downloads/Replication Files/Manifestos/")


################################################################################## Lithuania ################################################################################

LICS_04 <- pdf_text("Lithuania/LICS_04.pdf")
LSDP_04 <- pdf_text("Lithuania/LSDP_04.pdf")
TS_04 <- pdf_text("Lithuania/TS_04.pdf")

LICS_09 <- pdf_text("Lithuania/LICS_09.pdf")
LSDP_09 <- pdf_text("Lithuania/LSDP_09.pdf")
TS_09 <- pdf_text("Lithuania/TS_09.pdf")

write(LICS_04, "Lithuania/LICS_04.txt")
write(LSDP_04, "Lithuania/LSDP_04.txt")
write(TS_04, "Lithuania/TS_04.txt")
write(LICS_09, "Lithuania/LICS_09.txt")
write(LSDP_09, "Lithuania/LSDP_09.txt")
write(TS_09, "Lithuania/TS_09.txt")

lithuania_texts <- readtext("Lithuania/*.txt")
lithuania_corpus <- corpus(lithuania_texts)
lithuania_dfm <- dfm(lithuania_corpus)
is.dfm(lithuania_dfm)
lithuania <- lithuania_dfm

# Cleaning

lithuania <- dfm_tolower(lithuania)
lithuania <- dfm_select(lithuania,"[[:punct:]]", selection = "remove", valuetype = "regex", verbose = TRUE)
lithuania <- dfm_select(lithuania,"[[:digit:]]", selection = "remove", valuetype = "regex", verbose = TRUE)
lithuania <- dfm_select(lithuania,"[[:cntrl:]]", selection = "remove", valuetype = "regex", verbose = TRUE)
lithuania <- dfm_select(lithuania,"[[:space:]]", selection = "remove", valuetype = "regex", verbose = TRUE)
lithuania <- dfm_select(lithuania,"[[:blank:]]", selection = "remove", valuetype = "regex", verbose = TRUE)
lithuania <- dfm_select(lithuania, stopwords(language = "lt", source = "stopwords-iso"), selection = "remove", valuetype = "fixed", verbose = TRUE)

lithuania_dfm_words <- as.data.frame(ntoken(lithuania_dfm))
lithuania_dfm_uniquewords <- as.data.frame(ntype(lithuania_dfm))


# Run Wordscores

# Lithuania

lithuania@Dimnames$docs
scores_lithuania_lr_bl <- c(15.73684215546,NA,6.684210300446,NA,14.47365,NA)
scores_lithuania_eu_bl <- c(18.526316,NA,16.6,NA,18.736841,NA)
scores_lithuania_ec_bl <- c(16.631578,NA,6.5263157,NA,12.631579,NA)
scores_lithuania_so_bl <- c(6.4210525,NA,10.1,NA,15.842105,NA)

scores_lithuania_lr_ches <- c(5.599999904633,NA,2.599999904633,NA,8,NA)
scores_lithuania_eu_ches <- c(6.4,NA,6.4,NA,7,NA)
scores_lithuania_ec_ches <- c(6.25,NA,4,NA,7.25,NA)
scores_lithuania_so_ches <- c(4.1999998,NA,6.1999998,NA,5.5999999,NA)

scores_lithuania_lr_emp <- c(7,NA,3,NA,8,NA)
scores_lithuania_eu_emp <- c(9,NA,7,NA,8,NA)
scores_lithuania_ec_emp <- c(7,NA,6,NA,8,NA)
scores_lithuania_so_emp <- c(3,NA,5,NA,4,NA)

ws_lithuania_lr_bl <- textmodel_wordscores(lithuania, scores_lithuania_lr_bl)
ws_lithuania_eu_bl <- textmodel_wordscores(lithuania, scores_lithuania_eu_bl)
ws_lithuania_ec_bl <- textmodel_wordscores(lithuania, scores_lithuania_ec_bl)
ws_lithuania_so_bl <- textmodel_wordscores(lithuania, scores_lithuania_so_bl)

ws_lithuania_lr_ches <- textmodel_wordscores(lithuania, scores_lithuania_lr_ches)
ws_lithuania_eu_ches <- textmodel_wordscores(lithuania, scores_lithuania_eu_ches)
ws_lithuania_ec_ches <- textmodel_wordscores(lithuania, scores_lithuania_ec_ches)
ws_lithuania_so_ches <- textmodel_wordscores(lithuania, scores_lithuania_so_ches)

ws_lithuania_lr_emp <- textmodel_wordscores(lithuania, scores_lithuania_lr_emp)
ws_lithuania_eu_emp <- textmodel_wordscores(lithuania, scores_lithuania_eu_emp)
ws_lithuania_ec_emp <- textmodel_wordscores(lithuania, scores_lithuania_ec_emp)
ws_lithuania_so_emp <- textmodel_wordscores(lithuania, scores_lithuania_so_emp)


lithuania_lr_bl_lbg <- as.data.frame(predict(ws_lithuania_lr_bl, rescaling = "lbg"))
lithuania_eu_bl_lbg <- as.data.frame(predict(ws_lithuania_eu_bl, rescaling = "lbg"))
lithuania_ec_bl_lbg <- as.data.frame(predict(ws_lithuania_ec_bl, rescaling = "lbg"))
lithuania_so_bl_lbg <- as.data.frame(predict(ws_lithuania_so_bl, rescaling = "lbg"))

lithuania_lr_ches_lbg <- as.data.frame(predict(ws_lithuania_lr_ches, rescaling = "lbg"))
lithuania_eu_ches_lbg <- as.data.frame(predict(ws_lithuania_eu_ches, rescaling = "lbg"))
lithuania_ec_ches_lbg <- as.data.frame(predict(ws_lithuania_ec_ches, rescaling = "lbg"))
lithuania_so_ches_lbg <- as.data.frame(predict(ws_lithuania_so_ches, rescaling = "lbg"))

lithuania_lr_emp_lbg <- as.data.frame(predict(ws_lithuania_lr_emp, rescaling = "lbg"))
lithuania_eu_emp_lbg <- as.data.frame(predict(ws_lithuania_eu_emp, rescaling = "lbg"))
lithuania_ec_emp_lbg <- as.data.frame(predict(ws_lithuania_ec_emp, rescaling = "lbg"))
lithuania_so_emp_lbg <- as.data.frame(predict(ws_lithuania_so_emp, rescaling = "lbg"))

lithuania_lr_bl_mv <- as.data.frame(predict(ws_lithuania_lr_bl, rescaling = "mv"))
lithuania_eu_bl_mv <- as.data.frame(predict(ws_lithuania_eu_bl, rescaling = "mv"))
lithuania_ec_bl_mv <- as.data.frame(predict(ws_lithuania_ec_bl, rescaling = "mv"))
lithuania_so_bl_mv <- as.data.frame(predict(ws_lithuania_so_bl, rescaling = "mv"))

lithuania_lr_ches_mv <- as.data.frame(predict(ws_lithuania_lr_ches, rescaling = "mv"))
lithuania_eu_ches_mv <- as.data.frame(predict(ws_lithuania_eu_ches, rescaling = "mv"))
lithuania_ec_ches_mv <- as.data.frame(predict(ws_lithuania_ec_ches, rescaling = "mv"))
lithuania_so_ches_mv <- as.data.frame(predict(ws_lithuania_so_ches, rescaling = "mv"))

lithuania_lr_emp_mv <- as.data.frame(predict(ws_lithuania_lr_emp, rescaling = "mv"))
lithuania_eu_emp_mv <- as.data.frame(predict(ws_lithuania_eu_emp, rescaling = "mv"))
lithuania_ec_emp_mv <- as.data.frame(predict(ws_lithuania_ec_emp, rescaling = "mv"))
lithuania_so_emp_mv <- as.data.frame(predict(ws_lithuania_so_emp, rescaling = "mv"))

names(lithuania_lr_bl_lbg)[1] <- "bl_lr_lbg"
names(lithuania_eu_bl_lbg)[1] <- "bl_eu_lbg"
names(lithuania_ec_bl_lbg)[1] <- "bl_ec_lbg"
names(lithuania_so_bl_lbg)[1] <- "bl_so_lbg"

names(lithuania_lr_ches_lbg)[1] <- "ches_lr_lbg"
names(lithuania_eu_ches_lbg)[1] <- "ches_eu_lbg"
names(lithuania_ec_ches_lbg)[1] <- "ches_ec_lbg"
names(lithuania_so_ches_lbg)[1] <- "ches_so_lbg"

names(lithuania_lr_emp_lbg)[1] <- "emp_lr_lbg"
names(lithuania_eu_emp_lbg)[1] <- "emp_eu_lbg"
names(lithuania_ec_emp_lbg)[1] <- "emp_ec_lbg"
names(lithuania_so_emp_lbg)[1] <- "emp_so_lbg"

names(lithuania_lr_bl_mv)[1] <- "bl_lr_mv"
names(lithuania_eu_bl_mv)[1] <- "bl_eu_mv"
names(lithuania_ec_bl_mv)[1] <- "bl_ec_mv"
names(lithuania_so_bl_mv)[1] <- "bl_so_mv"

names(lithuania_lr_ches_mv)[1] <- "ches_lr_mv"
names(lithuania_eu_ches_mv)[1] <- "ches_eu_mv"
names(lithuania_ec_ches_mv)[1] <- "ches_ec_mv"
names(lithuania_so_ches_mv)[1] <- "ches_so_mv"

names(lithuania_lr_emp_mv)[1] <- "emp_lr_mv"
names(lithuania_eu_emp_mv)[1] <- "emp_eu_mv"
names(lithuania_ec_emp_mv)[1] <- "emp_ec_mv"
names(lithuania_so_emp_mv)[1] <- "emp_so_mv"

lithuania_wordscores <- cbind(lithuania_lr_bl_lbg, lithuania_eu_bl_lbg, lithuania_ec_bl_lbg, lithuania_so_bl_lbg,lithuania_lr_ches_lbg, lithuania_eu_ches_lbg, lithuania_ec_ches_lbg, lithuania_so_ches_lbg,lithuania_lr_emp_lbg, lithuania_eu_emp_lbg, lithuania_ec_emp_lbg, lithuania_so_emp_lbg,lithuania_lr_bl_mv, lithuania_eu_bl_mv, lithuania_ec_bl_mv, lithuania_so_bl_mv,lithuania_lr_ches_mv, lithuania_eu_ches_mv, lithuania_ec_ches_mv, lithuania_so_ches_mv,lithuania_lr_emp_mv, lithuania_eu_emp_mv, lithuania_ec_emp_mv, lithuania_so_emp_mv)
lithuania_wordscores <- as.matrix(lithuania_wordscores)
lithuania_wordscores <- as.data.frame(lithuania_wordscores)
setwd("~/Downloads/Replication Files/Wordscores/")
write.csv(lithuania_wordscores, file= "lithuania_wordscores.csv")
setwd("~/Downloads/Replication Files/Manifestos/")

################################################################################## Northern Ireland ################################################################################

UUP_04 <- pdf_convert("Northern Ireland/UUP_04.pdf")
UUP_04 <- ocr(UUP_04, engine = "eng")

DUP_04 <- pdf_text("Northern Ireland/DUP_04.pdf")
SDLP_04 <- pdf_text("Northern Ireland/SDLP_04.pdf")
SF_04 <- pdf_text("Northern Ireland/SF_04.pdf")

DUP_09 <- pdf_text("Northern Ireland/DUP_09.pdf")
SDLP_09 <- pdf_text("Northern Ireland/SDLP_09.pdf")
SF_09 <- pdf_text("Northern Ireland/SF_09.pdf")
UUP_09 <- pdf_text("Northern Ireland/UUP_09.pdf")

write(DUP_04, "Northern Ireland/DUP_04.txt")
write(SDLP_04, "Northern Ireland/SDLP_04.txt")
write(SF_04, "Northern Ireland/SF_04.txt")
write(UUP_04, "Northern Ireland/UUP_04.txt")
write(DUP_09, "Northern Ireland/DUP_09.txt")
write(SDLP_09, "Northern Ireland/SDLP_09.txt")
write(SF_09, "Northern Ireland/SF_09.txt")
write(UUP_09, "Northern Ireland/UUP_09.txt")

northernireland_texts <- readtext("Northern Ireland/*.txt")
northernireland_corpus <- corpus(northernireland_texts)
northernireland_dfm <- dfm(northernireland_corpus)
is.dfm(northernireland_dfm)
northernireland <- northernireland_dfm

# Cleaning

northernireland <- dfm_tolower(northernireland)
northernireland <- dfm_select(northernireland,"[[:punct:]]", selection = "remove", valuetype = "regex", verbose = TRUE)
northernireland <- dfm_select(northernireland,"[[:digit:]]", selection = "remove", valuetype = "regex", verbose = TRUE)
northernireland <- dfm_select(northernireland,"[[:cntrl:]]", selection = "remove", valuetype = "regex", verbose = TRUE)
northernireland <- dfm_select(northernireland,"[[:space:]]", selection = "remove", valuetype = "regex", verbose = TRUE)
northernireland <- dfm_select(northernireland,"[[:blank:]]", selection = "remove", valuetype = "regex", verbose = TRUE)
northernireland <- dfm_select(northernireland, stopwords(language = "en", source = "stopwords-iso"), selection = "remove", valuetype = "fixed", verbose = TRUE)

northernireland_dfm_words <- as.data.frame(ntoken(northernireland_dfm))
northernireland_dfm_uniquewords <- as.data.frame(ntype(northernireland_dfm))

# Run Wordscores

# Northern Ireland

northernireland@Dimnames$docs
scores_northernireland_lr_bl <- c(12.7,NA,8.5,NA,6.3,NA,13.8,NA)
scores_northernireland_eu_bl <- c(3.6,NA,16.5,NA,7.8,NA,7.2,NA)
scores_northernireland_ec_bl <- c(7.5,NA,6.5,NA,4.8,NA,12.2,NA)
scores_northernireland_so_bl <- c(18.8,NA,12.7,NA,9.1,NA,13.5,NA)

scores_northernireland_lr_emp <- c(6,NA,5,NA,3,NA,7,NA)
scores_northernireland_eu_emp <- c(2,NA,9,NA,3,NA,4,NA)
scores_northernireland_ec_emp <- c(3,NA,4,NA,3,NA,7,NA)
scores_northernireland_so_emp <- c(7,NA,6,NA,3,NA,5,NA)

ws_northernireland_lr_bl <- textmodel_wordscores(northernireland, scores_northernireland_lr_bl)
ws_northernireland_eu_bl <- textmodel_wordscores(northernireland, scores_northernireland_eu_bl)
ws_northernireland_ec_bl <- textmodel_wordscores(northernireland, scores_northernireland_ec_bl)
ws_northernireland_so_bl <- textmodel_wordscores(northernireland, scores_northernireland_so_bl)

ws_northernireland_lr_emp <- textmodel_wordscores(northernireland, scores_northernireland_lr_emp)
ws_northernireland_eu_emp <- textmodel_wordscores(northernireland, scores_northernireland_eu_emp)
ws_northernireland_ec_emp <- textmodel_wordscores(northernireland, scores_northernireland_ec_emp)
ws_northernireland_so_emp <- textmodel_wordscores(northernireland, scores_northernireland_so_emp)


northernireland_lr_bl_lbg <- as.data.frame(predict(ws_northernireland_lr_bl, rescaling = "lbg"))
northernireland_eu_bl_lbg <- as.data.frame(predict(ws_northernireland_eu_bl, rescaling = "lbg"))
northernireland_ec_bl_lbg <- as.data.frame(predict(ws_northernireland_ec_bl, rescaling = "lbg"))
northernireland_so_bl_lbg <- as.data.frame(predict(ws_northernireland_so_bl, rescaling = "lbg"))

northernireland_lr_ches_lbg <- data.frame(y = c(NA,NA,NA,NA,NA,NA,NA,NA), stringsAsFactors=FALSE)
northernireland_eu_ches_lbg <- data.frame(y = c(NA,NA,NA,NA,NA,NA,NA,NA), stringsAsFactors=FALSE)
northernireland_ec_ches_lbg <- data.frame(y = c(NA,NA,NA,NA,NA,NA,NA,NA), stringsAsFactors=FALSE)
northernireland_so_ches_lbg <- data.frame(y = c(NA,NA,NA,NA,NA,NA,NA,NA), stringsAsFactors=FALSE)

northernireland_lr_emp_lbg <- as.data.frame(predict(ws_northernireland_lr_emp, rescaling = "lbg"))
northernireland_eu_emp_lbg <- as.data.frame(predict(ws_northernireland_eu_emp, rescaling = "lbg"))
northernireland_ec_emp_lbg <- as.data.frame(predict(ws_northernireland_ec_emp, rescaling = "lbg"))
northernireland_so_emp_lbg <- as.data.frame(predict(ws_northernireland_so_emp, rescaling = "lbg"))

northernireland_lr_bl_mv <- as.data.frame(predict(ws_northernireland_lr_bl, rescaling = "mv"))
northernireland_eu_bl_mv <- as.data.frame(predict(ws_northernireland_eu_bl, rescaling = "mv"))
northernireland_ec_bl_mv <- as.data.frame(predict(ws_northernireland_ec_bl, rescaling = "mv"))
northernireland_so_bl_mv <- as.data.frame(predict(ws_northernireland_so_bl, rescaling = "mv"))

northernireland_lr_ches_mv <- data.frame(y = c(NA,NA,NA,NA,NA,NA,NA,NA), stringsAsFactors=FALSE)
northernireland_eu_ches_mv <- data.frame(y = c(NA,NA,NA,NA,NA,NA,NA,NA), stringsAsFactors=FALSE)
northernireland_ec_ches_mv <- data.frame(y = c(NA,NA,NA,NA,NA,NA,NA,NA), stringsAsFactors=FALSE)
northernireland_so_ches_mv <- data.frame(y = c(NA,NA,NA,NA,NA,NA,NA,NA), stringsAsFactors=FALSE)

northernireland_lr_emp_mv <- as.data.frame(predict(ws_northernireland_lr_emp, rescaling = "mv"))
northernireland_eu_emp_mv <- as.data.frame(predict(ws_northernireland_eu_emp, rescaling = "mv"))
northernireland_ec_emp_mv <- as.data.frame(predict(ws_northernireland_ec_emp, rescaling = "mv"))
northernireland_so_emp_mv <- as.data.frame(predict(ws_northernireland_so_emp, rescaling = "mv"))

names(northernireland_lr_bl_lbg)[1] <- "bl_lr_lbg"
names(northernireland_eu_bl_lbg)[1] <- "bl_eu_lbg"
names(northernireland_ec_bl_lbg)[1] <- "bl_ec_lbg"
names(northernireland_so_bl_lbg)[1] <- "bl_so_lbg"

names(northernireland_lr_ches_lbg)[1] <- "ches_lr_lbg"
names(northernireland_eu_ches_lbg)[1] <- "ches_eu_lbg"
names(northernireland_ec_ches_lbg)[1] <- "ches_ec_lbg"
names(northernireland_so_ches_lbg)[1] <- "ches_so_lbg"

names(northernireland_lr_emp_lbg)[1] <- "emp_lr_lbg"
names(northernireland_eu_emp_lbg)[1] <- "emp_eu_lbg"
names(northernireland_ec_emp_lbg)[1] <- "emp_ec_lbg"
names(northernireland_so_emp_lbg)[1] <- "emp_so_lbg"

names(northernireland_lr_bl_mv)[1] <- "bl_lr_mv"
names(northernireland_eu_bl_mv)[1] <- "bl_eu_mv"
names(northernireland_ec_bl_mv)[1] <- "bl_ec_mv"
names(northernireland_so_bl_mv)[1] <- "bl_so_mv"

names(northernireland_lr_ches_mv)[1] <- "ches_lr_mv"
names(northernireland_eu_ches_mv)[1] <- "ches_eu_mv"
names(northernireland_ec_ches_mv)[1] <- "ches_ec_mv"
names(northernireland_so_ches_mv)[1] <- "ches_so_mv"

names(northernireland_lr_emp_mv)[1] <- "emp_lr_mv"
names(northernireland_eu_emp_mv)[1] <- "emp_eu_mv"
names(northernireland_ec_emp_mv)[1] <- "emp_ec_mv"
names(northernireland_so_emp_mv)[1] <- "emp_so_mv"

northernireland_wordscores <- cbind(northernireland_lr_bl_lbg, northernireland_eu_bl_lbg, northernireland_ec_bl_lbg, northernireland_so_bl_lbg,northernireland_lr_ches_lbg, northernireland_eu_ches_lbg, northernireland_ec_ches_lbg, northernireland_so_ches_lbg,northernireland_lr_emp_lbg, northernireland_eu_emp_lbg, northernireland_ec_emp_lbg, northernireland_so_emp_lbg,northernireland_lr_bl_mv, northernireland_eu_bl_mv, northernireland_ec_bl_mv, northernireland_so_bl_mv,northernireland_lr_ches_mv, northernireland_eu_ches_mv, northernireland_ec_ches_mv, northernireland_so_ches_mv,northernireland_lr_emp_mv, northernireland_eu_emp_mv, northernireland_ec_emp_mv, northernireland_so_emp_mv)
northernireland_wordscores <- as.matrix(northernireland_wordscores)
northernireland_wordscores <- as.data.frame(northernireland_wordscores)
setwd("~/Downloads/Replication Files/Wordscores/")
write.csv(northernireland_wordscores, file= "northernireland_wordscores.csv")
setwd("~/Downloads/Replication Files/Manifestos/")



################################################################################## Netherlands ################################################################################

CDA_04 <- pdf_text("Netherlands/CDA_04.pdf")
CUSGP_04 <- pdf_text("Netherlands/CUSGP_04.pdf")
D66_04 <- pdf_text("Netherlands/D66_04.pdf")
GL_04 <- pdf_text("Netherlands/GL_04.pdf")
PVDA_04 <- pdf_text("Netherlands/PVDA_04.pdf")
SP_04 <- pdf_text("Netherlands/SP_04.pdf")
VVD_04 <- pdf_text("Netherlands/VVD_04.pdf")

CDA_09 <- pdf_text("Netherlands/CDA_09.pdf")
CUSGP_09 <- pdf_text("Netherlands/CUSGP_09.pdf")
D66_09 <- pdf_text("Netherlands/D66_09.pdf")
GL_09 <- pdf_text("Netherlands/GL_09.pdf")
PVDA_09 <- pdf_text("Netherlands/PVDA_09.pdf")
SP_09 <- pdf_text("Netherlands/SP_09.pdf")
VVD_09 <- pdf_text("Netherlands/VVD_09.pdf")

write(CDA_04, "Netherlands/CDA_04.txt")
write(CUSGP_04, "Netherlands/CUSGP_04.txt")
write(D66_04, "Netherlands/D66_04.txt")
write(GL_04, "Netherlands/GL_04.txt")
write(PVDA_04, "Netherlands/PVDA_04.txt")
write(SP_04, "Netherlands/SP_04.txt")
write(VVD_04, "Netherlands/VVD_04.txt")
write(CDA_09, "Netherlands/CDA_09.txt")
write(CUSGP_09, "Netherlands/CUSGP_09.txt")
write(D66_09, "Netherlands/D66_09.txt")
write(GL_09, "Netherlands/GL_09.txt")
write(PVDA_09, "Netherlands/PVDA_09.txt")
write(SP_09, "Netherlands/SP_09.txt")
write(VVD_09, "Netherlands/VVD_09.txt")

netherlands_texts <- readtext("Netherlands/*.txt")
netherlands_corpus <- corpus(netherlands_texts)
netherlands_dfm <- dfm(netherlands_corpus)
is.dfm(netherlands_dfm)
netherlands <- netherlands_dfm

# Cleaning

netherlands <- dfm_tolower(netherlands)
netherlands <- dfm_select(netherlands,"[[:punct:]]", selection = "remove", valuetype = "regex", verbose = TRUE)
netherlands <- dfm_select(netherlands,"[[:digit:]]", selection = "remove", valuetype = "regex", verbose = TRUE)
netherlands <- dfm_select(netherlands,"[[:cntrl:]]", selection = "remove", valuetype = "regex", verbose = TRUE)
netherlands <- dfm_select(netherlands,"[[:space:]]", selection = "remove", valuetype = "regex", verbose = TRUE)
netherlands <- dfm_select(netherlands,"[[:blank:]]", selection = "remove", valuetype = "regex", verbose = TRUE)
netherlands <- dfm_select(netherlands, stopwords(language = "nl", source = "stopwords-iso"), selection = "remove", valuetype = "fixed", verbose = TRUE)

netherlands_dfm_words <- as.data.frame(ntoken(netherlands_dfm))
netherlands_dfm_uniquewords <- as.data.frame(ntype(netherlands_dfm))

# Run Wordscores

# Netherlands

netherlands@Dimnames$docs
scores_netherlands_lr_bl <- c(13.57142829895,NA,14.35715,NA,10.38095283508,NA,4.952381134033,NA,8.57142829895,NA,3.095238208771,NA,16.33333396912,NA)
scores_netherlands_eu_bl <- c(11.3,NA,7.3571501,NA,13.9,NA,11.473684,NA,13.526316,NA,5.4705901,NA,8.3999996,NA)
scores_netherlands_ec_bl <- c(13.272727,NA,11.373355,NA,10,NA,5.090909,NA,8.090909,NA,3.6190476,NA,16.772728,NA)
scores_netherlands_so_bl <- c(13.227273,NA,18.476177,NA,2.590909,NA,2.5714285,NA,5.2272725,NA,7.25,NA,4.7727275,NA)

scores_netherlands_lr_ches <- c(6.130000114441,NA,7.31500005,NA,4.630000114441,NA,2.5,NA,4,NA,1.629999995232,NA,7.380000114441,NA)
scores_netherlands_eu_ches <- c(5.780000209808,NA,4.1099999,NA,6.44000005722,NA,5.380000114441,NA,6.110000133514,NA,3,NA,4.44000005722,NA)
scores_netherlands_ec_ches <- c(6.2199998,NA,5.915,NA,5.1100001,NA,1.89,NA,3.78,NA,.77999997,NA,8,NA)
scores_netherlands_so_ches <- c(6.8899999,NA,9.0799999,NA,1.4400001,NA,1.45,NA,3.25,NA,3.3299999,NA,5.5599999,NA)

scores_netherlands_lr_emp <- c(6,NA,6,NA,6,NA,3,NA,4,NA,2,NA,7,NA)
scores_netherlands_eu_emp <- c(9,NA,5,NA,9.01,NA,8.99,NA,8,NA,4,NA,9.02,NA)
scores_netherlands_ec_emp <- c(8.01,NA,7,NA,8.001,NA,3,NA,4,NA,2,NA,8,NA)
scores_netherlands_so_emp <- c(5,NA,7,NA,3,NA,4,NA,6.99,NA,7.01,NA,2,NA)

ws_netherlands_lr_bl <- textmodel_wordscores(netherlands, scores_netherlands_lr_bl)
ws_netherlands_eu_bl <- textmodel_wordscores(netherlands, scores_netherlands_eu_bl)
ws_netherlands_ec_bl <- textmodel_wordscores(netherlands, scores_netherlands_ec_bl)
ws_netherlands_so_bl <- textmodel_wordscores(netherlands, scores_netherlands_so_bl)

ws_netherlands_lr_ches <- textmodel_wordscores(netherlands, scores_netherlands_lr_ches)
ws_netherlands_eu_ches <- textmodel_wordscores(netherlands, scores_netherlands_eu_ches)
ws_netherlands_ec_ches <- textmodel_wordscores(netherlands, scores_netherlands_ec_ches)
ws_netherlands_so_ches <- textmodel_wordscores(netherlands, scores_netherlands_so_ches)

ws_netherlands_lr_emp <- textmodel_wordscores(netherlands, scores_netherlands_lr_emp)
ws_netherlands_eu_emp <- textmodel_wordscores(netherlands, scores_netherlands_eu_emp)
ws_netherlands_ec_emp <- textmodel_wordscores(netherlands, scores_netherlands_ec_emp)
ws_netherlands_so_emp <- textmodel_wordscores(netherlands, scores_netherlands_so_emp)


netherlands_lr_bl_lbg <- as.data.frame(predict(ws_netherlands_lr_bl, rescaling = "lbg"))
netherlands_eu_bl_lbg <- as.data.frame(predict(ws_netherlands_eu_bl, rescaling = "lbg"))
netherlands_ec_bl_lbg <- as.data.frame(predict(ws_netherlands_ec_bl, rescaling = "lbg"))
netherlands_so_bl_lbg <- as.data.frame(predict(ws_netherlands_so_bl, rescaling = "lbg"))

netherlands_lr_ches_lbg <- as.data.frame(predict(ws_netherlands_lr_ches, rescaling = "lbg"))
netherlands_eu_ches_lbg <- as.data.frame(predict(ws_netherlands_eu_ches, rescaling = "lbg"))
netherlands_ec_ches_lbg <- as.data.frame(predict(ws_netherlands_ec_ches, rescaling = "lbg"))
netherlands_so_ches_lbg <- as.data.frame(predict(ws_netherlands_so_ches, rescaling = "lbg"))

netherlands_lr_emp_lbg <- as.data.frame(predict(ws_netherlands_lr_emp, rescaling = "lbg"))
netherlands_eu_emp_lbg <- as.data.frame(predict(ws_netherlands_eu_emp, rescaling = "lbg"))
netherlands_ec_emp_lbg <- as.data.frame(predict(ws_netherlands_ec_emp, rescaling = "lbg"))
netherlands_so_emp_lbg <- as.data.frame(predict(ws_netherlands_so_emp, rescaling = "lbg"))

netherlands_lr_bl_mv <- as.data.frame(predict(ws_netherlands_lr_bl, rescaling = "mv"))
netherlands_eu_bl_mv <- as.data.frame(predict(ws_netherlands_eu_bl, rescaling = "mv"))
netherlands_ec_bl_mv <- as.data.frame(predict(ws_netherlands_ec_bl, rescaling = "mv"))
netherlands_so_bl_mv <- as.data.frame(predict(ws_netherlands_so_bl, rescaling = "mv"))

netherlands_lr_ches_mv <- as.data.frame(predict(ws_netherlands_lr_ches, rescaling = "mv"))
netherlands_eu_ches_mv <- as.data.frame(predict(ws_netherlands_eu_ches, rescaling = "mv"))
netherlands_ec_ches_mv <- as.data.frame(predict(ws_netherlands_ec_ches, rescaling = "mv"))
netherlands_so_ches_mv <- as.data.frame(predict(ws_netherlands_so_ches, rescaling = "mv"))

netherlands_lr_emp_mv <- as.data.frame(predict(ws_netherlands_lr_emp, rescaling = "mv"))
netherlands_eu_emp_mv <- as.data.frame(predict(ws_netherlands_eu_emp, rescaling = "mv"))
netherlands_ec_emp_mv <- as.data.frame(predict(ws_netherlands_ec_emp, rescaling = "mv"))
netherlands_so_emp_mv <- as.data.frame(predict(ws_netherlands_so_emp, rescaling = "mv"))

names(netherlands_lr_bl_lbg)[1] <- "bl_lr_lbg"
names(netherlands_eu_bl_lbg)[1] <- "bl_eu_lbg"
names(netherlands_ec_bl_lbg)[1] <- "bl_ec_lbg"
names(netherlands_so_bl_lbg)[1] <- "bl_so_lbg"

names(netherlands_lr_ches_lbg)[1] <- "ches_lr_lbg"
names(netherlands_eu_ches_lbg)[1] <- "ches_eu_lbg"
names(netherlands_ec_ches_lbg)[1] <- "ches_ec_lbg"
names(netherlands_so_ches_lbg)[1] <- "ches_so_lbg"

names(netherlands_lr_emp_lbg)[1] <- "emp_lr_lbg"
names(netherlands_eu_emp_lbg)[1] <- "emp_eu_lbg"
names(netherlands_ec_emp_lbg)[1] <- "emp_ec_lbg"
names(netherlands_so_emp_lbg)[1] <- "emp_so_lbg"

names(netherlands_lr_bl_mv)[1] <- "bl_lr_mv"
names(netherlands_eu_bl_mv)[1] <- "bl_eu_mv"
names(netherlands_ec_bl_mv)[1] <- "bl_ec_mv"
names(netherlands_so_bl_mv)[1] <- "bl_so_mv"

names(netherlands_lr_ches_mv)[1] <- "ches_lr_mv"
names(netherlands_eu_ches_mv)[1] <- "ches_eu_mv"
names(netherlands_ec_ches_mv)[1] <- "ches_ec_mv"
names(netherlands_so_ches_mv)[1] <- "ches_so_mv"

names(netherlands_lr_emp_mv)[1] <- "emp_lr_mv"
names(netherlands_eu_emp_mv)[1] <- "emp_eu_mv"
names(netherlands_ec_emp_mv)[1] <- "emp_ec_mv"
names(netherlands_so_emp_mv)[1] <- "emp_so_mv"

netherlands_wordscores <- cbind(netherlands_lr_bl_lbg, netherlands_eu_bl_lbg, netherlands_ec_bl_lbg, netherlands_so_bl_lbg,netherlands_lr_ches_lbg, netherlands_eu_ches_lbg, netherlands_ec_ches_lbg, netherlands_so_ches_lbg,netherlands_lr_emp_lbg, netherlands_eu_emp_lbg, netherlands_ec_emp_lbg, netherlands_so_emp_lbg,netherlands_lr_bl_mv, netherlands_eu_bl_mv, netherlands_ec_bl_mv, netherlands_so_bl_mv,netherlands_lr_ches_mv, netherlands_eu_ches_mv, netherlands_ec_ches_mv, netherlands_so_ches_mv,netherlands_lr_emp_mv, netherlands_eu_emp_mv, netherlands_ec_emp_mv, netherlands_so_emp_mv)
netherlands_wordscores <- as.matrix(netherlands_wordscores)
netherlands_wordscores <- as.data.frame(netherlands_wordscores)
setwd("~/Downloads/Replication Files/Wordscores/")
write.csv(netherlands_wordscores, file= "netherlands_wordscores.csv")
setwd("~/Downloads/Replication Files/Manifestos/")


################################################################################## Poland ################################################################################

PIS_04 <- pdf_text("Poland/PIS_04.pdf")
PO_04 <- pdf_text("Poland/PO_04.pdf")
PSL_04 <- pdf_text("Poland/PSL_04.pdf")
SLDUP_04 <- pdf_text("Poland/SLDUP_04.pdf")

PIS_09 <- pdf_text("Poland/PIS_09.pdf")
PO_09 <- pdf_text("Poland/PO_09.pdf")
PSL_09 <- pdf_text("Poland/PSL_09.pdf")
SLDUP_09 <- pdf_text("Poland/SLDUP_09.pdf")

write(PIS_04, "Poland/PIS_04.txt")
write(PO_04, "Poland/PO_04.txt")
write(PSL_04, "Poland/PSL_04.txt")
write(SLDUP_04, "Poland/SLDUP_04.txt")
write(PIS_09, "Poland/PIS_09.txt")
write(PO_09, "Poland/PO_09.txt")
write(PSL_09, "Poland/PSL_09.txt")
write(SLDUP_09, "Poland/SLDUP_09.txt")

poland_texts <- readtext("Poland/*.txt")
poland_corpus <- corpus(poland_texts)
poland_dfm <- dfm(poland_corpus)
is.dfm(poland_dfm)
poland <- poland_dfm

# Cleaning

poland <- dfm_tolower(poland)
poland <- dfm_select(poland,"[[:punct:]]", selection = "remove", valuetype = "regex", verbose = TRUE)
poland <- dfm_select(poland,"[[:digit:]]", selection = "remove", valuetype = "regex", verbose = TRUE)
poland <- dfm_select(poland,"[[:cntrl:]]", selection = "remove", valuetype = "regex", verbose = TRUE)
poland <- dfm_select(poland,"[[:space:]]", selection = "remove", valuetype = "regex", verbose = TRUE)
poland <- dfm_select(poland,"[[:blank:]]", selection = "remove", valuetype = "regex", verbose = TRUE)
poland <- dfm_select(poland, stopwords(language = "pl", source = "stopwords-iso"), selection = "remove", valuetype = "fixed", verbose = TRUE)

poland_dfm_words <- as.data.frame(ntoken(poland_dfm))
poland_dfm_uniquewords <- as.data.frame(ntype(poland_dfm))

# Run Wordscores

# Poland

poland@Dimnames$docs
scores_poland_lr_bl <- c(15.41935443878,NA,13.41935443878,NA,8.655172348022,NA,6.032258033752,NA)
scores_poland_ec_bl <- c(11.46875,NA,16.5,NA,6.0967741,NA,6,NA)
scores_poland_so_bl <- c(15.125,NA,8.6774197,NA,14.066667,NA,5.1612902,NA)

scores_poland_lr_ches <- c(7.75,NA,6.880000114441,NA,4.380000114441,NA,4.130000114441,NA)
scores_poland_eu_ches <- c(4.75,NA,6.75,NA,4.130000114441,NA,6.880000114441,NA)
scores_poland_ec_ches <- c(4.6300001,NA,8.3800001,NA,2.5,NA,4.25,NA)
scores_poland_so_ches <- c(7.75,NA,4.3800001,NA,7.6300001,NA,1.88,NA)

scores_poland_lr_emp <- c(8,NA,7,NA,3,NA,2,NA)
scores_poland_eu_emp <- c(6,NA,9,NA,7,NA,9,NA)
scores_poland_ec_emp <- c(5,NA,9,NA,4,NA,5,NA)
scores_poland_so_emp <- c(6,NA,5,NA,NA,NA,3,NA)

ws_poland_lr_bl <- textmodel_wordscores(poland, scores_poland_lr_bl)
ws_poland_ec_bl <- textmodel_wordscores(poland, scores_poland_ec_bl)
ws_poland_so_bl <- textmodel_wordscores(poland, scores_poland_so_bl)

ws_poland_lr_ches <- textmodel_wordscores(poland, scores_poland_lr_ches)
ws_poland_eu_ches <- textmodel_wordscores(poland, scores_poland_eu_ches)
ws_poland_ec_ches <- textmodel_wordscores(poland, scores_poland_ec_ches)
ws_poland_so_ches <- textmodel_wordscores(poland, scores_poland_so_ches)

ws_poland_lr_emp <- textmodel_wordscores(poland, scores_poland_lr_emp)
ws_poland_eu_emp <- textmodel_wordscores(poland, scores_poland_eu_emp)
ws_poland_ec_emp <- textmodel_wordscores(poland, scores_poland_ec_emp)
ws_poland_so_emp <- textmodel_wordscores(poland, scores_poland_so_emp)


poland_lr_bl_lbg <- as.data.frame(predict(ws_poland_lr_bl, rescaling = "lbg"))
poland_eu_bl_lbg <-data.frame(y = c(NA,NA,NA,NA,NA,NA,NA,NA), stringsAsFactors=FALSE)
poland_ec_bl_lbg <- as.data.frame(predict(ws_poland_ec_bl, rescaling = "lbg"))
poland_so_bl_lbg <- as.data.frame(predict(ws_poland_so_bl, rescaling = "lbg"))

poland_lr_ches_lbg <- as.data.frame(predict(ws_poland_lr_ches, rescaling = "lbg"))
poland_eu_ches_lbg <- as.data.frame(predict(ws_poland_eu_ches, rescaling = "lbg"))
poland_ec_ches_lbg <- as.data.frame(predict(ws_poland_ec_ches, rescaling = "lbg"))
poland_so_ches_lbg <- as.data.frame(predict(ws_poland_so_ches, rescaling = "lbg"))

poland_lr_emp_lbg <- as.data.frame(predict(ws_poland_lr_emp, rescaling = "lbg"))
poland_eu_emp_lbg <- as.data.frame(predict(ws_poland_eu_emp, rescaling = "lbg"))
poland_ec_emp_lbg <- as.data.frame(predict(ws_poland_ec_emp, rescaling = "lbg"))
poland_so_emp_lbg <- as.data.frame(predict(ws_poland_so_emp, rescaling = "lbg"))

poland_lr_bl_mv <- as.data.frame(predict(ws_poland_lr_bl, rescaling = "mv"))
poland_eu_bl_mv <-data.frame(y = c(NA,NA,NA,NA,NA,NA,NA,NA), stringsAsFactors=FALSE)
poland_ec_bl_mv <- as.data.frame(predict(ws_poland_ec_bl, rescaling = "mv"))
poland_so_bl_mv <- as.data.frame(predict(ws_poland_so_bl, rescaling = "mv"))

poland_lr_ches_mv <- as.data.frame(predict(ws_poland_lr_ches, rescaling = "mv"))
poland_eu_ches_mv <- as.data.frame(predict(ws_poland_eu_ches, rescaling = "mv"))
poland_ec_ches_mv <- as.data.frame(predict(ws_poland_ec_ches, rescaling = "mv"))
poland_so_ches_mv <- as.data.frame(predict(ws_poland_so_ches, rescaling = "mv"))

poland_lr_emp_mv <- as.data.frame(predict(ws_poland_lr_emp, rescaling = "mv"))
poland_eu_emp_mv <- as.data.frame(predict(ws_poland_eu_emp, rescaling = "mv"))
poland_ec_emp_mv <- as.data.frame(predict(ws_poland_ec_emp, rescaling = "mv"))
poland_so_emp_mv <- as.data.frame(predict(ws_poland_so_emp, rescaling = "mv"))

names(poland_lr_bl_lbg)[1] <- "bl_lr_lbg"
names(poland_eu_bl_lbg)[1] <- "bl_eu_lbg"
names(poland_ec_bl_lbg)[1] <- "bl_ec_lbg"
names(poland_so_bl_lbg)[1] <- "bl_so_lbg"

names(poland_lr_ches_lbg)[1] <- "ches_lr_lbg"
names(poland_eu_ches_lbg)[1] <- "ches_eu_lbg"
names(poland_ec_ches_lbg)[1] <- "ches_ec_lbg"
names(poland_so_ches_lbg)[1] <- "ches_so_lbg"

names(poland_lr_emp_lbg)[1] <- "emp_lr_lbg"
names(poland_eu_emp_lbg)[1] <- "emp_eu_lbg"
names(poland_ec_emp_lbg)[1] <- "emp_ec_lbg"
names(poland_so_emp_lbg)[1] <- "emp_so_lbg"

names(poland_lr_bl_mv)[1] <- "bl_lr_mv"
names(poland_eu_bl_mv)[1] <- "bl_eu_mv"
names(poland_ec_bl_mv)[1] <- "bl_ec_mv"
names(poland_so_bl_mv)[1] <- "bl_so_mv"

names(poland_lr_ches_mv)[1] <- "ches_lr_mv"
names(poland_eu_ches_mv)[1] <- "ches_eu_mv"
names(poland_ec_ches_mv)[1] <- "ches_ec_mv"
names(poland_so_ches_mv)[1] <- "ches_so_mv"

names(poland_lr_emp_mv)[1] <- "emp_lr_mv"
names(poland_eu_emp_mv)[1] <- "emp_eu_mv"
names(poland_ec_emp_mv)[1] <- "emp_ec_mv"
names(poland_so_emp_mv)[1] <- "emp_so_mv"

poland_wordscores <- cbind(poland_lr_bl_lbg, poland_eu_bl_lbg, poland_ec_bl_lbg, poland_so_bl_lbg,poland_lr_ches_lbg, poland_eu_ches_lbg, poland_ec_ches_lbg, poland_so_ches_lbg,poland_lr_emp_lbg, poland_eu_emp_lbg, poland_ec_emp_lbg, poland_so_emp_lbg,poland_lr_bl_mv, poland_eu_bl_mv, poland_ec_bl_mv, poland_so_bl_mv,poland_lr_ches_mv, poland_eu_ches_mv, poland_ec_ches_mv, poland_so_ches_mv,poland_lr_emp_mv, poland_eu_emp_mv, poland_ec_emp_mv, poland_so_emp_mv)
poland_wordscores <- as.matrix(poland_wordscores)
poland_wordscores <- as.data.frame(poland_wordscores)
setwd("~/Downloads/Replication Files/Wordscores/")
write.csv(poland_wordscores, file= "poland_wordscores.csv")
setwd("~/Downloads/Replication Files/Manifestos/")


################################################################################## Portugal ################################################################################

BE_04 <- pdf_text("Portugal/BE_04.pdf")
CDU_04 <- pdf_text("Portugal/CDU_04.pdf")
PS_04 <- pdf_text("Portugal/PS_04.pdf")
PSD_04 <- pdf_text("Portugal/PSD_04.pdf")

BE_09 <- pdf_text("Portugal/BE_09.pdf")
CDU_09 <- pdf_text("Portugal/CDU_09.pdf")
PS_09 <- pdf_text("Portugal/PS_09.pdf")
PSD_09 <- pdf_text("Portugal/PSD_09.pdf")

write(BE_04, "Portugal/BE_04.txt")
write(CDU_04, "Portugal/CDU_04.txt")
write(PS_04, "Portugal/PS_04.txt")
write(PSD_04, "Portugal/PSD_04.txt")
write(BE_09, "Portugal/BE_09.txt")
write(CDU_09, "Portugal/CDU_09.txt")
write(PS_09, "Portugal/PS_09.txt")
write(PSD_09, "Portugal/PSD_09.txt")

portugal_texts <- readtext("Portugal/*.txt")
portugal_corpus <- corpus(portugal_texts)
portugal_dfm <- dfm(portugal_corpus)
is.dfm(portugal_dfm)
portugal <- portugal_dfm

# Cleaning

portugal <- dfm_tolower(portugal)
portugal <- dfm_select(portugal,"[[:punct:]]", selection = "remove", valuetype = "regex", verbose = TRUE)
portugal <- dfm_select(portugal,"[[:digit:]]", selection = "remove", valuetype = "regex", verbose = TRUE)
portugal <- dfm_select(portugal,"[[:cntrl:]]", selection = "remove", valuetype = "regex", verbose = TRUE)
portugal <- dfm_select(portugal,"[[:space:]]", selection = "remove", valuetype = "regex", verbose = TRUE)
portugal <- dfm_select(portugal,"[[:blank:]]", selection = "remove", valuetype = "regex", verbose = TRUE)
portugal <- dfm_select(portugal, stopwords(language = "pt", source = "stopwords-iso"), selection = "remove", valuetype = "fixed", verbose = TRUE)

portugal_dfm_words <- as.data.frame(ntoken(portugal_dfm))
portugal_dfm_uniquewords <- as.data.frame(ntype(portugal_dfm))

# Run Wordscores

# Portugal

portugal@Dimnames$docs
scores_portugal_lr_bl <- c(2.952380895615,NA,4.150000095367,NA,8.666666984558,NA,13.85714244843,NA)
scores_portugal_eu_bl <- c(7.8499999,NA,7.8666701,NA,14.3,NA,11.6,NA)
scores_portugal_ec_bl <- c(5.0999999,NA,4.6151299,NA,8.6499996,NA,14.5,NA)
scores_portugal_so_bl <- c(1.7619047,NA,4.0952382,NA,8.0952377,NA,14.85,NA)

scores_portugal_lr_ches <- c(NA,NA,1.570000052452,NA,4,NA,6.429999828339,NA)
scores_portugal_eu_ches <- c(NA,NA,2.710000038147,NA,6.860000133514,NA,6.429999828339,NA)
scores_portugal_ec_ches <- c(NA,NA,1.14,NA,3.8599999,NA,7.29,NA)
scores_portugal_so_ches <- c(NA,NA,2.4300001,NA,3.4300001,NA,7,NA)

scores_portugal_lr_emp <- c(2.001,NA,2,NA,4,NA,7,NA)
scores_portugal_eu_emp <- c(6,NA,2,NA,9,NA,7,NA)
scores_portugal_ec_emp <- c(3,NA,2,NA,4,NA,8,NA)
scores_portugal_so_emp <- c(2,NA,8,NA,4,NA,7,NA)

ws_portugal_lr_bl <- textmodel_wordscores(portugal, scores_portugal_lr_bl)
ws_portugal_eu_bl <- textmodel_wordscores(portugal, scores_portugal_eu_bl)
ws_portugal_ec_bl <- textmodel_wordscores(portugal, scores_portugal_ec_bl)
ws_portugal_so_bl <- textmodel_wordscores(portugal, scores_portugal_so_bl)

ws_portugal_lr_ches <- textmodel_wordscores(portugal, scores_portugal_lr_ches)
ws_portugal_eu_ches <- textmodel_wordscores(portugal, scores_portugal_eu_ches)
ws_portugal_ec_ches <- textmodel_wordscores(portugal, scores_portugal_ec_ches)
ws_portugal_so_ches <- textmodel_wordscores(portugal, scores_portugal_so_ches)

ws_portugal_lr_emp <- textmodel_wordscores(portugal, scores_portugal_lr_emp)
ws_portugal_eu_emp <- textmodel_wordscores(portugal, scores_portugal_eu_emp)
ws_portugal_ec_emp <- textmodel_wordscores(portugal, scores_portugal_ec_emp)
ws_portugal_so_emp <- textmodel_wordscores(portugal, scores_portugal_so_emp)


portugal_lr_bl_lbg <- as.data.frame(predict(ws_portugal_lr_bl, rescaling = "lbg"))
portugal_eu_bl_lbg <- as.data.frame(predict(ws_portugal_eu_bl, rescaling = "lbg"))
portugal_ec_bl_lbg <- as.data.frame(predict(ws_portugal_ec_bl, rescaling = "lbg"))
portugal_so_bl_lbg <- as.data.frame(predict(ws_portugal_so_bl, rescaling = "lbg"))

portugal_lr_ches_lbg <- as.data.frame(predict(ws_portugal_lr_ches, rescaling = "lbg"))
portugal_eu_ches_lbg <- as.data.frame(predict(ws_portugal_eu_ches, rescaling = "lbg"))
portugal_ec_ches_lbg <- as.data.frame(predict(ws_portugal_ec_ches, rescaling = "lbg"))
portugal_so_ches_lbg <- as.data.frame(predict(ws_portugal_so_ches, rescaling = "lbg"))

portugal_lr_emp_lbg <- as.data.frame(predict(ws_portugal_lr_emp, rescaling = "lbg"))
portugal_eu_emp_lbg <- as.data.frame(predict(ws_portugal_eu_emp, rescaling = "lbg"))
portugal_ec_emp_lbg <- as.data.frame(predict(ws_portugal_ec_emp, rescaling = "lbg"))
portugal_so_emp_lbg <- as.data.frame(predict(ws_portugal_so_emp, rescaling = "lbg"))

portugal_lr_bl_mv <- as.data.frame(predict(ws_portugal_lr_bl, rescaling = "mv"))
portugal_eu_bl_mv <- as.data.frame(predict(ws_portugal_eu_bl, rescaling = "mv"))
portugal_ec_bl_mv <- as.data.frame(predict(ws_portugal_ec_bl, rescaling = "mv"))
portugal_so_bl_mv <- as.data.frame(predict(ws_portugal_so_bl, rescaling = "mv"))

portugal_lr_ches_mv <- as.data.frame(predict(ws_portugal_lr_ches, rescaling = "mv"))
portugal_eu_ches_mv <- as.data.frame(predict(ws_portugal_eu_ches, rescaling = "mv"))
portugal_ec_ches_mv <- as.data.frame(predict(ws_portugal_ec_ches, rescaling = "mv"))
portugal_so_ches_mv <- as.data.frame(predict(ws_portugal_so_ches, rescaling = "mv"))

portugal_lr_emp_mv <- as.data.frame(predict(ws_portugal_lr_emp, rescaling = "mv"))
portugal_eu_emp_mv <- as.data.frame(predict(ws_portugal_eu_emp, rescaling = "mv"))
portugal_ec_emp_mv <- as.data.frame(predict(ws_portugal_ec_emp, rescaling = "mv"))
portugal_so_emp_mv <- as.data.frame(predict(ws_portugal_so_emp, rescaling = "mv"))

names(portugal_lr_bl_lbg)[1] <- "bl_lr_lbg"
names(portugal_eu_bl_lbg)[1] <- "bl_eu_lbg"
names(portugal_ec_bl_lbg)[1] <- "bl_ec_lbg"
names(portugal_so_bl_lbg)[1] <- "bl_so_lbg"

names(portugal_lr_ches_lbg)[1] <- "ches_lr_lbg"
names(portugal_eu_ches_lbg)[1] <- "ches_eu_lbg"
names(portugal_ec_ches_lbg)[1] <- "ches_ec_lbg"
names(portugal_so_ches_lbg)[1] <- "ches_so_lbg"

names(portugal_lr_emp_lbg)[1] <- "emp_lr_lbg"
names(portugal_eu_emp_lbg)[1] <- "emp_eu_lbg"
names(portugal_ec_emp_lbg)[1] <- "emp_ec_lbg"
names(portugal_so_emp_lbg)[1] <- "emp_so_lbg"

names(portugal_lr_bl_mv)[1] <- "bl_lr_mv"
names(portugal_eu_bl_mv)[1] <- "bl_eu_mv"
names(portugal_ec_bl_mv)[1] <- "bl_ec_mv"
names(portugal_so_bl_mv)[1] <- "bl_so_mv"

names(portugal_lr_ches_mv)[1] <- "ches_lr_mv"
names(portugal_eu_ches_mv)[1] <- "ches_eu_mv"
names(portugal_ec_ches_mv)[1] <- "ches_ec_mv"
names(portugal_so_ches_mv)[1] <- "ches_so_mv"

names(portugal_lr_emp_mv)[1] <- "emp_lr_mv"
names(portugal_eu_emp_mv)[1] <- "emp_eu_mv"
names(portugal_ec_emp_mv)[1] <- "emp_ec_mv"
names(portugal_so_emp_mv)[1] <- "emp_so_mv"

portugal_wordscores <- cbind(portugal_lr_bl_lbg, portugal_eu_bl_lbg, portugal_ec_bl_lbg, portugal_so_bl_lbg,portugal_lr_ches_lbg, portugal_eu_ches_lbg, portugal_ec_ches_lbg, portugal_so_ches_lbg,portugal_lr_emp_lbg, portugal_eu_emp_lbg, portugal_ec_emp_lbg, portugal_so_emp_lbg,portugal_lr_bl_mv, portugal_eu_bl_mv, portugal_ec_bl_mv, portugal_so_bl_mv,portugal_lr_ches_mv, portugal_eu_ches_mv, portugal_ec_ches_mv, portugal_so_ches_mv,portugal_lr_emp_mv, portugal_eu_emp_mv, portugal_ec_emp_mv, portugal_so_emp_mv)
portugal_wordscores <- as.matrix(portugal_wordscores)
portugal_wordscores <- as.data.frame(portugal_wordscores)
setwd("~/Downloads/Replication Files/Wordscores/")
write.csv(portugal_wordscores, file= "portugal_wordscores.csv")
setwd("~/Downloads/Replication Files/Manifestos/")


################################################################################## Slovakia ################################################################################

KDH_04 <- pdf_text("Slovakia/KDH_04.pdf")
LSHZDS_04 <- pdf_text("Slovakia/LSHZDS_04.pdf")
SDKUDS_04 <- pdf_text("Slovakia/SDKUDS_04.pdf")
SMERSD_04 <- pdf_text("Slovakia/SMERSD_04.pdf")
SMKMKP_04 <- pdf_text("Slovakia/SMKMKP_04.pdf")

KDH_09 <- pdf_text("Slovakia/KDH_09.pdf")
LSHZDS_09 <- pdf_text("Slovakia/LSHZDS_09.pdf")
SDKUDS_09 <- pdf_text("Slovakia/SDKUDS_09.pdf")
SMERSD_09 <- pdf_text("Slovakia/SMERSD_09.pdf")
SMKMKP_09 <- pdf_text("Slovakia/SMKMKP_09.pdf")

write(KDH_04, "Slovakia/KDH_04.txt")
write(LSHZDS_04, "Slovakia/LSHZDS_04.txt")
write(SDKUDS_04, "Slovakia/SDKUDS_04.txt")
write(SMERSD_04, "Slovakia/SMERSD_04.txt")
write(SMKMKP_04, "Slovakia/SMKMKP_04.txt")
write(KDH_09, "Slovakia/KDH_09.txt")
write(LSHZDS_09, "Slovakia/LSHZDS_09.txt")
write(SDKUDS_09, "Slovakia/SDKUDS_09.txt")
write(SMERSD_09, "Slovakia/SMERSD_09.txt")
write(SMKMKP_09, "Slovakia/SMKMKP_09.txt")

slovakia_texts <- readtext("Slovakia/*.txt")
slovakia_corpus <- corpus(slovakia_texts)
slovakia_dfm <- dfm(slovakia_corpus)
is.dfm(slovakia_dfm)
slovakia <- slovakia_dfm

# Cleaning

slovakia <- dfm_tolower(slovakia)
slovakia <- dfm_select(slovakia,"[[:punct:]]", selection = "remove", valuetype = "regex", verbose = TRUE)
slovakia <- dfm_select(slovakia,"[[:digit:]]", selection = "remove", valuetype = "regex", verbose = TRUE)
slovakia <- dfm_select(slovakia,"[[:cntrl:]]", selection = "remove", valuetype = "regex", verbose = TRUE)
slovakia <- dfm_select(slovakia,"[[:space:]]", selection = "remove", valuetype = "regex", verbose = TRUE)
slovakia <- dfm_select(slovakia,"[[:blank:]]", selection = "remove", valuetype = "regex", verbose = TRUE)
slovakia <- dfm_select(slovakia, stopwords(language = "sk", source = "stopwords-iso"), selection = "remove", valuetype = "fixed", verbose = TRUE)

slovakia_dfm_words <- as.data.frame(ntoken(slovakia_dfm))
slovakia_dfm_uniquewords <- as.data.frame(ntype(slovakia_dfm))

# Run Wordscores

# Slovakia

slovakia@Dimnames$docs
scores_slovakia_lr_bl <- c(17.0625,NA,10.88235282898,NA,15.625,NA,7.6875,NA,13.60000038147,NA)
scores_slovakia_ec_bl <- c(14.375,NA,10.176471,NA,13.882353,NA,9.4117651,NA,13.125,NA)
scores_slovakia_so_bl <- c(19.705883,NA,9.4705887,NA,14.470589,NA,4.9411764,NA,15,NA)

scores_slovakia_lr_ches <- c(7.639999866486,NA,4.380000114441,NA,7.070000171661,NA,3.539999961853,NA,6.789999961853,NA)
scores_slovakia_eu_ches <- c(5.5,NA,5.5,NA,7,NA,5.360000133514,NA,6.860000133514,NA)
scores_slovakia_ec_ches <- c(7.0799999,NA,4.1700001,NA,7.25,NA,3.75,NA,5.6700001,NA)
scores_slovakia_so_ches <- c(7.8299999,NA,7.1500001,NA,3.54,NA,4.46,NA,5.23,NA)

scores_slovakia_lr_emp <- c(8.001,NA,6,NA,8,NA,4,NA,7,NA)
scores_slovakia_eu_emp <- c(3,NA,8.0001,NA,7,NA,8.001,NA,8.00001,NA)
scores_slovakia_ec_emp <- c(8.001,NA,5,NA,8,NA,4,NA,6,NA)
scores_slovakia_so_emp <- c(8,NA,8.001,NA,4,NA,3.5,NA,6,NA)

ws_slovakia_lr_bl <- textmodel_wordscores(slovakia, scores_slovakia_lr_bl)
ws_slovakia_ec_bl <- textmodel_wordscores(slovakia, scores_slovakia_ec_bl)
ws_slovakia_so_bl <- textmodel_wordscores(slovakia, scores_slovakia_so_bl)

ws_slovakia_lr_ches <- textmodel_wordscores(slovakia, scores_slovakia_lr_ches)
ws_slovakia_eu_ches <- textmodel_wordscores(slovakia, scores_slovakia_eu_ches)
ws_slovakia_ec_ches <- textmodel_wordscores(slovakia, scores_slovakia_ec_ches)
ws_slovakia_so_ches <- textmodel_wordscores(slovakia, scores_slovakia_so_ches)

ws_slovakia_lr_emp <- textmodel_wordscores(slovakia, scores_slovakia_lr_emp)
ws_slovakia_eu_emp <- textmodel_wordscores(slovakia, scores_slovakia_eu_emp)
ws_slovakia_ec_emp <- textmodel_wordscores(slovakia, scores_slovakia_ec_emp)
ws_slovakia_so_emp <- textmodel_wordscores(slovakia, scores_slovakia_so_emp)


slovakia_lr_bl_lbg <- as.data.frame(predict(ws_slovakia_lr_bl, rescaling = "lbg"))
slovakia_eu_bl_lbg <-data.frame(y = c(NA,NA,NA,NA,NA,NA,NA,NA,NA,NA), stringsAsFactors=FALSE)
slovakia_ec_bl_lbg <- as.data.frame(predict(ws_slovakia_ec_bl, rescaling = "lbg"))
slovakia_so_bl_lbg <- as.data.frame(predict(ws_slovakia_so_bl, rescaling = "lbg"))

slovakia_lr_ches_lbg <- as.data.frame(predict(ws_slovakia_lr_ches, rescaling = "lbg"))
slovakia_eu_ches_lbg <- as.data.frame(predict(ws_slovakia_eu_ches, rescaling = "lbg"))
slovakia_ec_ches_lbg <- as.data.frame(predict(ws_slovakia_ec_ches, rescaling = "lbg"))
slovakia_so_ches_lbg <- as.data.frame(predict(ws_slovakia_so_ches, rescaling = "lbg"))

slovakia_lr_emp_lbg <- as.data.frame(predict(ws_slovakia_lr_emp, rescaling = "lbg"))
slovakia_eu_emp_lbg <- as.data.frame(predict(ws_slovakia_eu_emp, rescaling = "lbg"))
slovakia_ec_emp_lbg <- as.data.frame(predict(ws_slovakia_ec_emp, rescaling = "lbg"))
slovakia_so_emp_lbg <- as.data.frame(predict(ws_slovakia_so_emp, rescaling = "lbg"))

slovakia_lr_bl_mv <- as.data.frame(predict(ws_slovakia_lr_bl, rescaling = "mv"))
slovakia_eu_bl_mv <-data.frame(y = c(NA,NA,NA,NA,NA,NA,NA,NA,NA,NA), stringsAsFactors=FALSE)
slovakia_ec_bl_mv <- as.data.frame(predict(ws_slovakia_ec_bl, rescaling = "mv"))
slovakia_so_bl_mv <- as.data.frame(predict(ws_slovakia_so_bl, rescaling = "mv"))

slovakia_lr_ches_mv <- as.data.frame(predict(ws_slovakia_lr_ches, rescaling = "mv"))
slovakia_eu_ches_mv <- as.data.frame(predict(ws_slovakia_eu_ches, rescaling = "mv"))
slovakia_ec_ches_mv <- as.data.frame(predict(ws_slovakia_ec_ches, rescaling = "mv"))
slovakia_so_ches_mv <- as.data.frame(predict(ws_slovakia_so_ches, rescaling = "mv"))

slovakia_lr_emp_mv <- as.data.frame(predict(ws_slovakia_lr_emp, rescaling = "mv"))
slovakia_eu_emp_mv <- as.data.frame(predict(ws_slovakia_eu_emp, rescaling = "mv"))
slovakia_ec_emp_mv <- as.data.frame(predict(ws_slovakia_ec_emp, rescaling = "mv"))
slovakia_so_emp_mv <- as.data.frame(predict(ws_slovakia_so_emp, rescaling = "mv"))

names(slovakia_lr_bl_lbg)[1] <- "bl_lr_lbg"
names(slovakia_eu_bl_lbg)[1] <- "bl_eu_lbg"
names(slovakia_ec_bl_lbg)[1] <- "bl_ec_lbg"
names(slovakia_so_bl_lbg)[1] <- "bl_so_lbg"

names(slovakia_lr_ches_lbg)[1] <- "ches_lr_lbg"
names(slovakia_eu_ches_lbg)[1] <- "ches_eu_lbg"
names(slovakia_ec_ches_lbg)[1] <- "ches_ec_lbg"
names(slovakia_so_ches_lbg)[1] <- "ches_so_lbg"

names(slovakia_lr_emp_lbg)[1] <- "emp_lr_lbg"
names(slovakia_eu_emp_lbg)[1] <- "emp_eu_lbg"
names(slovakia_ec_emp_lbg)[1] <- "emp_ec_lbg"
names(slovakia_so_emp_lbg)[1] <- "emp_so_lbg"

names(slovakia_lr_bl_mv)[1] <- "bl_lr_mv"
names(slovakia_eu_bl_mv)[1] <- "bl_eu_mv"
names(slovakia_ec_bl_mv)[1] <- "bl_ec_mv"
names(slovakia_so_bl_mv)[1] <- "bl_so_mv"

names(slovakia_lr_ches_mv)[1] <- "ches_lr_mv"
names(slovakia_eu_ches_mv)[1] <- "ches_eu_mv"
names(slovakia_ec_ches_mv)[1] <- "ches_ec_mv"
names(slovakia_so_ches_mv)[1] <- "ches_so_mv"

names(slovakia_lr_emp_mv)[1] <- "emp_lr_mv"
names(slovakia_eu_emp_mv)[1] <- "emp_eu_mv"
names(slovakia_ec_emp_mv)[1] <- "emp_ec_mv"
names(slovakia_so_emp_mv)[1] <- "emp_so_mv"

slovakia_wordscores <- cbind(slovakia_lr_bl_lbg, slovakia_eu_bl_lbg, slovakia_ec_bl_lbg, slovakia_so_bl_lbg,slovakia_lr_ches_lbg, slovakia_eu_ches_lbg, slovakia_ec_ches_lbg, slovakia_so_ches_lbg,slovakia_lr_emp_lbg, slovakia_eu_emp_lbg, slovakia_ec_emp_lbg, slovakia_so_emp_lbg,slovakia_lr_bl_mv, slovakia_eu_bl_mv, slovakia_ec_bl_mv, slovakia_so_bl_mv,slovakia_lr_ches_mv, slovakia_eu_ches_mv, slovakia_ec_ches_mv, slovakia_so_ches_mv,slovakia_lr_emp_mv, slovakia_eu_emp_mv, slovakia_ec_emp_mv, slovakia_so_emp_mv)
slovakia_wordscores <- as.matrix(slovakia_wordscores)
slovakia_wordscores <- as.data.frame(slovakia_wordscores)
setwd("~/Downloads/Replication Files/Wordscores/")
write.csv(slovakia_wordscores, file= "slovakia_wordscores.csv")
setwd("~/Downloads/Replication Files/Manifestos/")


################################################################################## Slovenia ################################################################################

NSI_04 <- pdf_convert("Slovenia/NSI_04.pdf")
NSI_04 <- ocr(NSI_04, engine = "slv")
SD_04 <- pdf_convert("Slovenia/SD_04.pdf")
SD_04 <- ocr(SD_04, engine = "slv")

SDS_04 <- pdf_text("Slovenia/SDS_04.pdf")
SLS_04 <- pdf_text("Slovenia/SLS_04.pdf")

NSI_09 <- pdf_text("Slovenia/NSI_09.pdf")
SD_09 <- pdf_text("Slovenia/SD_09.pdf")
SDS_09 <- pdf_text("Slovenia/SDS_09.pdf")
SLS_09 <- pdf_text("Slovenia/SLS_09.pdf")

write(NSI_04, "Slovenia/NSI_04.txt")
write(SD_04, "Slovenia/SD_04.txt")
write(SDS_04, "Slovenia/SDS_04.txt")
write(SLS_04, "Slovenia/SLS_04.txt")
write(NSI_09, "Slovenia/NSI_09.txt")
write(SD_09, "Slovenia/SD_09.txt")
write(SDS_09, "Slovenia/SDS_09.txt")
write(SLS_09, "Slovenia/SLS_09.txt")

slovenia_texts <- readtext("Slovenia/*.txt")
slovenia_corpus <- corpus(slovenia_texts)
slovenia_dfm <- dfm(slovenia_corpus)
is.dfm(slovenia_dfm)
slovenia <- slovenia_dfm

# Cleaning

slovenia <- dfm_tolower(slovenia)
slovenia <- dfm_select(slovenia,"[[:punct:]]", selection = "remove", valuetype = "regex", verbose = TRUE)
slovenia <- dfm_select(slovenia,"[[:digit:]]", selection = "remove", valuetype = "regex", verbose = TRUE)
slovenia <- dfm_select(slovenia,"[[:cntrl:]]", selection = "remove", valuetype = "regex", verbose = TRUE)
slovenia <- dfm_select(slovenia,"[[:space:]]", selection = "remove", valuetype = "regex", verbose = TRUE)
slovenia <- dfm_select(slovenia,"[[:blank:]]", selection = "remove", valuetype = "regex", verbose = TRUE)
slovenia <- dfm_select(slovenia, stopwords(language = "sl", source = "stopwords-iso"), selection = "remove", valuetype = "fixed", verbose = TRUE)

slovenia_dfm_words <- as.data.frame(ntoken(slovenia_dfm))
slovenia_dfm_uniquewords <- as.data.frame(ntype(slovenia_dfm))


# Run Wordscores

# Slovenia

slovenia@Dimnames$docs
scores_slovenia_lr_bl <- c(16.96666717529,NA,4.783333301544,NA,14.67796611786,NA,14.08333301544,NA)
scores_slovenia_ec_bl <- c(13,NA,8.2203388,NA,11.833333,NA,10.661017,NA)
scores_slovenia_so_bl <- c(17.338984,NA,5.3559322,NA,14.65,NA,15.610169,NA)

scores_slovenia_lr_ches <- c(7,NA,3.599999904633,NA,6.400000095367,NA,6.800000190735,NA)
scores_slovenia_eu_ches <- c(6,NA,5.5,NA,6.400000095367,NA,4.800000190735,NA)
scores_slovenia_ec_ches <- c(4.8000002,NA,4.1999998,NA,4.8000002,NA,5,NA)
scores_slovenia_so_ches <- c(7.8000002,NA,3.8,NA,5.8000002,NA,7.1999998,NA)

scores_slovenia_lr_emp <- c(9.001,NA,3,NA,7,NA,9,NA)
scores_slovenia_eu_emp <- c(10,NA,9.99,NA,9.998,NA,8,NA)
scores_slovenia_ec_emp <- c(8,NA,3,NA,5,NA,6,NA)
scores_slovenia_so_emp <- c(5.0002,NA,5.0001,NA,5.0003,NA,7,NA)

ws_slovenia_lr_bl <- textmodel_wordscores(slovenia, scores_slovenia_lr_bl)
ws_slovenia_ec_bl <- textmodel_wordscores(slovenia, scores_slovenia_ec_bl)
ws_slovenia_so_bl <- textmodel_wordscores(slovenia, scores_slovenia_so_bl)

ws_slovenia_lr_ches <- textmodel_wordscores(slovenia, scores_slovenia_lr_ches)
ws_slovenia_eu_ches <- textmodel_wordscores(slovenia, scores_slovenia_eu_ches)
ws_slovenia_ec_ches <- textmodel_wordscores(slovenia, scores_slovenia_ec_ches)
ws_slovenia_so_ches <- textmodel_wordscores(slovenia, scores_slovenia_so_ches)

ws_slovenia_lr_emp <- textmodel_wordscores(slovenia, scores_slovenia_lr_emp)
ws_slovenia_eu_emp <- textmodel_wordscores(slovenia, scores_slovenia_eu_emp)
ws_slovenia_ec_emp <- textmodel_wordscores(slovenia, scores_slovenia_ec_emp)
ws_slovenia_so_emp <- textmodel_wordscores(slovenia, scores_slovenia_so_emp)


slovenia_lr_bl_lbg <- as.data.frame(predict(ws_slovenia_lr_bl, rescaling = "lbg"))
slovenia_eu_bl_lbg <-data.frame(y = c(NA,NA,NA,NA,NA,NA,NA,NA), stringsAsFactors=FALSE)
slovenia_ec_bl_lbg <- as.data.frame(predict(ws_slovenia_ec_bl, rescaling = "lbg"))
slovenia_so_bl_lbg <- as.data.frame(predict(ws_slovenia_so_bl, rescaling = "lbg"))

slovenia_lr_ches_lbg <- as.data.frame(predict(ws_slovenia_lr_ches, rescaling = "lbg"))
slovenia_eu_ches_lbg <- as.data.frame(predict(ws_slovenia_eu_ches, rescaling = "lbg"))
slovenia_ec_ches_lbg <- as.data.frame(predict(ws_slovenia_ec_ches, rescaling = "lbg"))
slovenia_so_ches_lbg <- as.data.frame(predict(ws_slovenia_so_ches, rescaling = "lbg"))

slovenia_lr_emp_lbg <- as.data.frame(predict(ws_slovenia_lr_emp, rescaling = "lbg"))
slovenia_eu_emp_lbg <- as.data.frame(predict(ws_slovenia_eu_emp, rescaling = "lbg"))
slovenia_ec_emp_lbg <- as.data.frame(predict(ws_slovenia_ec_emp, rescaling = "lbg"))
slovenia_so_emp_lbg <- as.data.frame(predict(ws_slovenia_so_emp, rescaling = "lbg"))

slovenia_lr_bl_mv <- as.data.frame(predict(ws_slovenia_lr_bl, rescaling = "mv"))
slovenia_eu_bl_mv <-data.frame(y = c(NA,NA,NA,NA,NA,NA,NA,NA), stringsAsFactors=FALSE)
slovenia_ec_bl_mv <- as.data.frame(predict(ws_slovenia_ec_bl, rescaling = "mv"))
slovenia_so_bl_mv <- as.data.frame(predict(ws_slovenia_so_bl, rescaling = "mv"))

slovenia_lr_ches_mv <- as.data.frame(predict(ws_slovenia_lr_ches, rescaling = "mv"))
slovenia_eu_ches_mv <- as.data.frame(predict(ws_slovenia_eu_ches, rescaling = "mv"))
slovenia_ec_ches_mv <- as.data.frame(predict(ws_slovenia_ec_ches, rescaling = "mv"))
slovenia_so_ches_mv <- as.data.frame(predict(ws_slovenia_so_ches, rescaling = "mv"))

slovenia_lr_emp_mv <- as.data.frame(predict(ws_slovenia_lr_emp, rescaling = "mv"))
slovenia_eu_emp_mv <- as.data.frame(predict(ws_slovenia_eu_emp, rescaling = "mv"))
slovenia_ec_emp_mv <- as.data.frame(predict(ws_slovenia_ec_emp, rescaling = "mv"))
slovenia_so_emp_mv <- as.data.frame(predict(ws_slovenia_so_emp, rescaling = "mv"))

names(slovenia_lr_bl_lbg)[1] <- "bl_lr_lbg"
names(slovenia_eu_bl_lbg)[1] <- "bl_eu_lbg"
names(slovenia_ec_bl_lbg)[1] <- "bl_ec_lbg"
names(slovenia_so_bl_lbg)[1] <- "bl_so_lbg"

names(slovenia_lr_ches_lbg)[1] <- "ches_lr_lbg"
names(slovenia_eu_ches_lbg)[1] <- "ches_eu_lbg"
names(slovenia_ec_ches_lbg)[1] <- "ches_ec_lbg"
names(slovenia_so_ches_lbg)[1] <- "ches_so_lbg"

names(slovenia_lr_emp_lbg)[1] <- "emp_lr_lbg"
names(slovenia_eu_emp_lbg)[1] <- "emp_eu_lbg"
names(slovenia_ec_emp_lbg)[1] <- "emp_ec_lbg"
names(slovenia_so_emp_lbg)[1] <- "emp_so_lbg"

names(slovenia_lr_bl_mv)[1] <- "bl_lr_mv"
names(slovenia_eu_bl_mv)[1] <- "bl_eu_mv"
names(slovenia_ec_bl_mv)[1] <- "bl_ec_mv"
names(slovenia_so_bl_mv)[1] <- "bl_so_mv"

names(slovenia_lr_ches_mv)[1] <- "ches_lr_mv"
names(slovenia_eu_ches_mv)[1] <- "ches_eu_mv"
names(slovenia_ec_ches_mv)[1] <- "ches_ec_mv"
names(slovenia_so_ches_mv)[1] <- "ches_so_mv"

names(slovenia_lr_emp_mv)[1] <- "emp_lr_mv"
names(slovenia_eu_emp_mv)[1] <- "emp_eu_mv"
names(slovenia_ec_emp_mv)[1] <- "emp_ec_mv"
names(slovenia_so_emp_mv)[1] <- "emp_so_mv"

slovenia_wordscores <- cbind(slovenia_lr_bl_lbg, slovenia_eu_bl_lbg, slovenia_ec_bl_lbg, slovenia_so_bl_lbg,slovenia_lr_ches_lbg, slovenia_eu_ches_lbg, slovenia_ec_ches_lbg, slovenia_so_ches_lbg,slovenia_lr_emp_lbg, slovenia_eu_emp_lbg, slovenia_ec_emp_lbg, slovenia_so_emp_lbg,slovenia_lr_bl_mv, slovenia_eu_bl_mv, slovenia_ec_bl_mv, slovenia_so_bl_mv,slovenia_lr_ches_mv, slovenia_eu_ches_mv, slovenia_ec_ches_mv, slovenia_so_ches_mv,slovenia_lr_emp_mv, slovenia_eu_emp_mv, slovenia_ec_emp_mv, slovenia_so_emp_mv)
slovenia_wordscores <- as.matrix(slovenia_wordscores)
slovenia_wordscores <- as.data.frame(slovenia_wordscores)
setwd("~/Downloads/Replication Files/Wordscores/")
write.csv(slovenia_wordscores, file= "slovenia_wordscores.csv")
setwd("~/Downloads/Replication Files/Manifestos/")


################################################################################## Spain ################################################################################

PNVEAJ_04 <- pdf_text("Spain/PNVEAJ_04.pdf")
PP_04 <- pdf_text("Spain/PP_04.pdf")
PSOEPSC_04 <- pdf_text("Spain/PSOEPSC_04.pdf")

PNVEAJ_09 <- pdf_text("Spain/PNVEAJ_09.pdf")
PP_09 <- pdf_text("Spain/PP_09.pdf")
PSOEPSC_09 <- pdf_text("Spain/PSOEPSC_09.pdf")

write(PNVEAJ_04, "Spain/PNVEAJ_04.txt")
write(PP_04, "Spain/PP_04.txt")
write(PSOEPSC_04, "Spain/PSOEPSC_04.txt")
write(PNVEAJ_09, "Spain/PNVEAJ_09.txt")
write(PP_09, "Spain/PP_09.txt")
write(PSOEPSC_09, "Spain/PSOEPSC_09.txt")

spain_texts <- readtext("Spain/*.txt")
spain_corpus <- corpus(spain_texts)
spain_dfm <- dfm(spain_corpus)
is.dfm(spain_dfm)
spain <- spain_dfm

# Cleaning

spain <- dfm_tolower(spain)
spain <- dfm_select(spain,"[[:punct:]]", selection = "remove", valuetype = "regex", verbose = TRUE)
spain <- dfm_select(spain,"[[:digit:]]", selection = "remove", valuetype = "regex", verbose = TRUE)
spain <- dfm_select(spain,"[[:cntrl:]]", selection = "remove", valuetype = "regex", verbose = TRUE)
spain <- dfm_select(spain,"[[:space:]]", selection = "remove", valuetype = "regex", verbose = TRUE)
spain <- dfm_select(spain,"[[:blank:]]", selection = "remove", valuetype = "regex", verbose = TRUE)
spain <- dfm_select(spain, stopwords(language = "es", source = "stopwords-iso"), selection = "remove", valuetype = "fixed", verbose = TRUE)

spain_dfm_words <- as.data.frame(ntoken(spain_dfm))
spain_dfm_uniquewords <- as.data.frame(ntype(spain_dfm))

# Run Wordscores

# Spain

spain@Dimnames$docs
scores_spain_lr_bl <- c(14.5,NA,16.98684120178,NA,8.197368621826,NA)
scores_spain_eu_bl <- c(12.706897,NA,8.3906202,NA,14.174603,NA)
scores_spain_ec_bl <- c(11.318841,NA,16.657894,NA,7.3600001,NA)
scores_spain_so_bl <- c(13.930555,NA,17.157894,NA,5.5657897,NA)

scores_spain_lr_ches <- c(5.849999904633,NA,6.920000076294,NA,4,NA)
scores_spain_eu_ches <- c(6.079999923706,NA,6.230000019073,NA,6.69000005722,NA)
scores_spain_ec_ches <- c(5.9200001,NA,7.6900001,NA,4.23,NA)
scores_spain_so_ches <- c(6.5799999,NA,7.6199999,NA,3.6900001,NA)

scores_spain_lr_emp <- c(7.001,NA,7,NA,4,NA)
scores_spain_eu_emp <- c(8.0002,NA,8.0001,NA,10,NA)
scores_spain_ec_emp <- c(8,NA,7,NA,5,NA)
scores_spain_so_emp <- c(5,NA,7,NA,3,NA)

ws_spain_lr_bl <- textmodel_wordscores(spain, scores_spain_lr_bl)
ws_spain_eu_bl <- textmodel_wordscores(spain, scores_spain_eu_bl)
ws_spain_ec_bl <- textmodel_wordscores(spain, scores_spain_ec_bl)
ws_spain_so_bl <- textmodel_wordscores(spain, scores_spain_so_bl)

ws_spain_lr_ches <- textmodel_wordscores(spain, scores_spain_lr_ches)
ws_spain_eu_ches <- textmodel_wordscores(spain, scores_spain_eu_ches)
ws_spain_ec_ches <- textmodel_wordscores(spain, scores_spain_ec_ches)
ws_spain_so_ches <- textmodel_wordscores(spain, scores_spain_so_ches)

ws_spain_lr_emp <- textmodel_wordscores(spain, scores_spain_lr_emp)
ws_spain_eu_emp <- textmodel_wordscores(spain, scores_spain_eu_emp)
ws_spain_ec_emp <- textmodel_wordscores(spain, scores_spain_ec_emp)
ws_spain_so_emp <- textmodel_wordscores(spain, scores_spain_so_emp)


spain_lr_bl_lbg <- as.data.frame(predict(ws_spain_lr_bl, rescaling = "lbg"))
spain_eu_bl_lbg <- as.data.frame(predict(ws_spain_eu_bl, rescaling = "lbg"))
spain_ec_bl_lbg <- as.data.frame(predict(ws_spain_ec_bl, rescaling = "lbg"))
spain_so_bl_lbg <- as.data.frame(predict(ws_spain_so_bl, rescaling = "lbg"))

spain_lr_ches_lbg <- as.data.frame(predict(ws_spain_lr_ches, rescaling = "lbg"))
spain_eu_ches_lbg <- as.data.frame(predict(ws_spain_eu_ches, rescaling = "lbg"))
spain_ec_ches_lbg <- as.data.frame(predict(ws_spain_ec_ches, rescaling = "lbg"))
spain_so_ches_lbg <- as.data.frame(predict(ws_spain_so_ches, rescaling = "lbg"))

spain_lr_emp_lbg <- as.data.frame(predict(ws_spain_lr_emp, rescaling = "lbg"))
spain_eu_emp_lbg <- as.data.frame(predict(ws_spain_eu_emp, rescaling = "lbg"))
spain_ec_emp_lbg <- as.data.frame(predict(ws_spain_ec_emp, rescaling = "lbg"))
spain_so_emp_lbg <- as.data.frame(predict(ws_spain_so_emp, rescaling = "lbg"))

spain_lr_bl_mv <- as.data.frame(predict(ws_spain_lr_bl, rescaling = "mv"))
spain_eu_bl_mv <- as.data.frame(predict(ws_spain_eu_bl, rescaling = "mv"))
spain_ec_bl_mv <- as.data.frame(predict(ws_spain_ec_bl, rescaling = "mv"))
spain_so_bl_mv <- as.data.frame(predict(ws_spain_so_bl, rescaling = "mv"))

spain_lr_ches_mv <- as.data.frame(predict(ws_spain_lr_ches, rescaling = "mv"))
spain_eu_ches_mv <- as.data.frame(predict(ws_spain_eu_ches, rescaling = "mv"))
spain_ec_ches_mv <- as.data.frame(predict(ws_spain_ec_ches, rescaling = "mv"))
spain_so_ches_mv <- as.data.frame(predict(ws_spain_so_ches, rescaling = "mv"))

spain_lr_emp_mv <- as.data.frame(predict(ws_spain_lr_emp, rescaling = "mv"))
spain_eu_emp_mv <- as.data.frame(predict(ws_spain_eu_emp, rescaling = "mv"))
spain_ec_emp_mv <- as.data.frame(predict(ws_spain_ec_emp, rescaling = "mv"))
spain_so_emp_mv <- as.data.frame(predict(ws_spain_so_emp, rescaling = "mv"))

names(spain_lr_bl_lbg)[1] <- "bl_lr_lbg"
names(spain_eu_bl_lbg)[1] <- "bl_eu_lbg"
names(spain_ec_bl_lbg)[1] <- "bl_ec_lbg"
names(spain_so_bl_lbg)[1] <- "bl_so_lbg"

names(spain_lr_ches_lbg)[1] <- "ches_lr_lbg"
names(spain_eu_ches_lbg)[1] <- "ches_eu_lbg"
names(spain_ec_ches_lbg)[1] <- "ches_ec_lbg"
names(spain_so_ches_lbg)[1] <- "ches_so_lbg"

names(spain_lr_emp_lbg)[1] <- "emp_lr_lbg"
names(spain_eu_emp_lbg)[1] <- "emp_eu_lbg"
names(spain_ec_emp_lbg)[1] <- "emp_ec_lbg"
names(spain_so_emp_lbg)[1] <- "emp_so_lbg"

names(spain_lr_bl_mv)[1] <- "bl_lr_mv"
names(spain_eu_bl_mv)[1] <- "bl_eu_mv"
names(spain_ec_bl_mv)[1] <- "bl_ec_mv"
names(spain_so_bl_mv)[1] <- "bl_so_mv"

names(spain_lr_ches_mv)[1] <- "ches_lr_mv"
names(spain_eu_ches_mv)[1] <- "ches_eu_mv"
names(spain_ec_ches_mv)[1] <- "ches_ec_mv"
names(spain_so_ches_mv)[1] <- "ches_so_mv"

names(spain_lr_emp_mv)[1] <- "emp_lr_mv"
names(spain_eu_emp_mv)[1] <- "emp_eu_mv"
names(spain_ec_emp_mv)[1] <- "emp_ec_mv"
names(spain_so_emp_mv)[1] <- "emp_so_mv"

spain_wordscores <- cbind(spain_lr_bl_lbg, spain_eu_bl_lbg, spain_ec_bl_lbg, spain_so_bl_lbg,spain_lr_ches_lbg, spain_eu_ches_lbg, spain_ec_ches_lbg, spain_so_ches_lbg,spain_lr_emp_lbg, spain_eu_emp_lbg, spain_ec_emp_lbg, spain_so_emp_lbg,spain_lr_bl_mv, spain_eu_bl_mv, spain_ec_bl_mv, spain_so_bl_mv,spain_lr_ches_mv, spain_eu_ches_mv, spain_ec_ches_mv, spain_so_ches_mv,spain_lr_emp_mv, spain_eu_emp_mv, spain_ec_emp_mv, spain_so_emp_mv)
spain_wordscores <- as.matrix(spain_wordscores)
spain_wordscores <- as.data.frame(spain_wordscores)
setwd("~/Downloads/Replication Files/Wordscores/")
write.csv(spain_wordscores, file= "spain_wordscores.csv")
setwd("~/Downloads/Replication Files/Manifestos/")


################################################################################## Sweden ################################################################################

C_04 <- pdf_text("Sweden/C_04.pdf")
KD_04 <- pdf_text("Sweden/KD_04.pdf")
M_04 <- pdf_text("Sweden/M_04.pdf")
MP_04 <- pdf_text("Sweden/MP_04.pdf")
S_04 <- pdf_text("Sweden/S_04.pdf")
V_04 <- pdf_text("Sweden/V_04.pdf")

C_09 <- pdf_text("Sweden/C_09.pdf")
KD_09 <- pdf_text("Sweden/KD_09.pdf")
M_09 <- pdf_text("Sweden/M_09.pdf")
MP_09 <- pdf_text("Sweden/MP_09.pdf")
S_09 <- pdf_text("Sweden/S_09.pdf")
V_09 <- pdf_text("Sweden/V_09.pdf")

write(C_04, "Sweden/C_04.txt")
write(KD_04, "Sweden/KD_04.txt")
write(M_04, "Sweden/M_04.txt")
write(MP_04, "Sweden/MP_04.txt")
write(S_04, "Sweden/S_04.txt")
write(V_04, "Sweden/V_04.txt")
write(C_09, "Sweden/C_09.txt")
write(KD_09, "Sweden/KD_09.txt")
write(M_09, "Sweden/M_09.txt")
write(MP_09, "Sweden/MP_09.txt")
write(S_09, "Sweden/S_09.txt")
write(V_09, "Sweden/V_09.txt")

sweden_texts <- readtext("Sweden/*.txt")
sweden_corpus <- corpus(sweden_texts)
sweden_dfm <- dfm(sweden_corpus)
is.dfm(sweden_dfm)
sweden <- sweden_dfm

# Cleaning

sweden <- dfm_tolower(sweden)
sweden <- dfm_select(sweden,"[[:punct:]]", selection = "remove", valuetype = "regex", verbose = TRUE)
sweden <- dfm_select(sweden,"[[:digit:]]", selection = "remove", valuetype = "regex", verbose = TRUE)
sweden <- dfm_select(sweden,"[[:cntrl:]]", selection = "remove", valuetype = "regex", verbose = TRUE)
sweden <- dfm_select(sweden,"[[:space:]]", selection = "remove", valuetype = "regex", verbose = TRUE)
sweden <- dfm_select(sweden,"[[:blank:]]", selection = "remove", valuetype = "regex", verbose = TRUE)
sweden <- dfm_select(sweden, stopwords(language = "sv", source = "stopwords-iso"), selection = "remove", valuetype = "fixed", verbose = TRUE)

sweden_dfm_words <- as.data.frame(ntoken(sweden_dfm))
sweden_dfm_uniquewords <- as.data.frame(ntype(sweden_dfm))

# Run Wordscores

# Sweden

sweden@Dimnames$docs
scores_sweden_lr_bl <- c(12.22388076782,NA,16.0447769165,NA,17.19403076172,NA,7.462686538696,NA,8.29850769043,NA,3.626865625381,NA)
scores_sweden_eu_bl <- c(7.5454502,NA,12.629032,NA,15.030303,NA,3.5302999,NA,12.318182,NA,3.1194,NA)
scores_sweden_ec_bl <- c(11.439394,NA,13.727273,NA,17.686567,NA,7.1641793,NA,7.1194029,NA,3.2686567,NA)
scores_sweden_so_bl <- c(11.646153,NA,17.746269,NA,12.686567,NA,5.0454545,NA,7.1044774,NA,3.6119404,NA)

scores_sweden_lr_ches <- c(5.579999923706,NA,7.25,NA,7.75,NA,3.329999923706,NA,3.5,NA,1.669999957085,NA)
scores_sweden_eu_ches <- c(4.420000076294,NA,5.920000076294,NA,6.329999923706,NA,1.830000042915,NA,5.5,NA,2.079999923706,NA)
scores_sweden_ec_ches <- c(5.5799999,NA,6.9200001,NA,8.6300001,NA,3.75,NA,3.71,NA,1.63,NA)
scores_sweden_so_ches <- c(6.25,NA,7.5,NA,4.6399999,NA,2.4200001,NA,4.8299999,NA,3.1700001,NA)

scores_sweden_lr_emp <- c(6,NA,8.001,NA,8,NA,3.001,NA,4,NA,3,NA)
scores_sweden_eu_emp <- c(4,NA,7,NA,8,NA,2.001,NA,6,NA,2,NA)
scores_sweden_ec_emp <- c(6,NA,7,NA,8,NA,4,NA,4,NA,2,NA)
scores_sweden_so_emp <- c(6,NA,6,NA,3,NA,5,NA,5,NA,7,NA)

ws_sweden_lr_bl <- textmodel_wordscores(sweden, scores_sweden_lr_bl)
ws_sweden_eu_bl <- textmodel_wordscores(sweden, scores_sweden_eu_bl)
ws_sweden_ec_bl <- textmodel_wordscores(sweden, scores_sweden_ec_bl)
ws_sweden_so_bl <- textmodel_wordscores(sweden, scores_sweden_so_bl)

ws_sweden_lr_ches <- textmodel_wordscores(sweden, scores_sweden_lr_ches)
ws_sweden_eu_ches <- textmodel_wordscores(sweden, scores_sweden_eu_ches)
ws_sweden_ec_ches <- textmodel_wordscores(sweden, scores_sweden_ec_ches)
ws_sweden_so_ches <- textmodel_wordscores(sweden, scores_sweden_so_ches)

ws_sweden_lr_emp <- textmodel_wordscores(sweden, scores_sweden_lr_emp)
ws_sweden_eu_emp <- textmodel_wordscores(sweden, scores_sweden_eu_emp)
ws_sweden_ec_emp <- textmodel_wordscores(sweden, scores_sweden_ec_emp)
ws_sweden_so_emp <- textmodel_wordscores(sweden, scores_sweden_so_emp)


sweden_lr_bl_lbg <- as.data.frame(predict(ws_sweden_lr_bl, rescaling = "lbg"))
sweden_eu_bl_lbg <- as.data.frame(predict(ws_sweden_eu_bl, rescaling = "lbg"))
sweden_ec_bl_lbg <- as.data.frame(predict(ws_sweden_ec_bl, rescaling = "lbg"))
sweden_so_bl_lbg <- as.data.frame(predict(ws_sweden_so_bl, rescaling = "lbg"))

sweden_lr_ches_lbg <- as.data.frame(predict(ws_sweden_lr_ches, rescaling = "lbg"))
sweden_eu_ches_lbg <- as.data.frame(predict(ws_sweden_eu_ches, rescaling = "lbg"))
sweden_ec_ches_lbg <- as.data.frame(predict(ws_sweden_ec_ches, rescaling = "lbg"))
sweden_so_ches_lbg <- as.data.frame(predict(ws_sweden_so_ches, rescaling = "lbg"))

sweden_lr_emp_lbg <- as.data.frame(predict(ws_sweden_lr_emp, rescaling = "lbg"))
sweden_eu_emp_lbg <- as.data.frame(predict(ws_sweden_eu_emp, rescaling = "lbg"))
sweden_ec_emp_lbg <- as.data.frame(predict(ws_sweden_ec_emp, rescaling = "lbg"))
sweden_so_emp_lbg <- as.data.frame(predict(ws_sweden_so_emp, rescaling = "lbg"))

sweden_lr_bl_mv <- as.data.frame(predict(ws_sweden_lr_bl, rescaling = "mv"))
sweden_eu_bl_mv <- as.data.frame(predict(ws_sweden_eu_bl, rescaling = "mv"))
sweden_ec_bl_mv <- as.data.frame(predict(ws_sweden_ec_bl, rescaling = "mv"))
sweden_so_bl_mv <- as.data.frame(predict(ws_sweden_so_bl, rescaling = "mv"))

sweden_lr_ches_mv <- as.data.frame(predict(ws_sweden_lr_ches, rescaling = "mv"))
sweden_eu_ches_mv <- as.data.frame(predict(ws_sweden_eu_ches, rescaling = "mv"))
sweden_ec_ches_mv <- as.data.frame(predict(ws_sweden_ec_ches, rescaling = "mv"))
sweden_so_ches_mv <- as.data.frame(predict(ws_sweden_so_ches, rescaling = "mv"))

sweden_lr_emp_mv <- as.data.frame(predict(ws_sweden_lr_emp, rescaling = "mv"))
sweden_eu_emp_mv <- as.data.frame(predict(ws_sweden_eu_emp, rescaling = "mv"))
sweden_ec_emp_mv <- as.data.frame(predict(ws_sweden_ec_emp, rescaling = "mv"))
sweden_so_emp_mv <- as.data.frame(predict(ws_sweden_so_emp, rescaling = "mv"))

names(sweden_lr_bl_lbg)[1] <- "bl_lr_lbg"
names(sweden_eu_bl_lbg)[1] <- "bl_eu_lbg"
names(sweden_ec_bl_lbg)[1] <- "bl_ec_lbg"
names(sweden_so_bl_lbg)[1] <- "bl_so_lbg"

names(sweden_lr_ches_lbg)[1] <- "ches_lr_lbg"
names(sweden_eu_ches_lbg)[1] <- "ches_eu_lbg"
names(sweden_ec_ches_lbg)[1] <- "ches_ec_lbg"
names(sweden_so_ches_lbg)[1] <- "ches_so_lbg"

names(sweden_lr_emp_lbg)[1] <- "emp_lr_lbg"
names(sweden_eu_emp_lbg)[1] <- "emp_eu_lbg"
names(sweden_ec_emp_lbg)[1] <- "emp_ec_lbg"
names(sweden_so_emp_lbg)[1] <- "emp_so_lbg"

names(sweden_lr_bl_mv)[1] <- "bl_lr_mv"
names(sweden_eu_bl_mv)[1] <- "bl_eu_mv"
names(sweden_ec_bl_mv)[1] <- "bl_ec_mv"
names(sweden_so_bl_mv)[1] <- "bl_so_mv"

names(sweden_lr_ches_mv)[1] <- "ches_lr_mv"
names(sweden_eu_ches_mv)[1] <- "ches_eu_mv"
names(sweden_ec_ches_mv)[1] <- "ches_ec_mv"
names(sweden_so_ches_mv)[1] <- "ches_so_mv"

names(sweden_lr_emp_mv)[1] <- "emp_lr_mv"
names(sweden_eu_emp_mv)[1] <- "emp_eu_mv"
names(sweden_ec_emp_mv)[1] <- "emp_ec_mv"
names(sweden_so_emp_mv)[1] <- "emp_so_mv"

sweden_wordscores <- cbind(sweden_lr_bl_lbg, sweden_eu_bl_lbg, sweden_ec_bl_lbg, sweden_so_bl_lbg,sweden_lr_ches_lbg, sweden_eu_ches_lbg, sweden_ec_ches_lbg, sweden_so_ches_lbg,sweden_lr_emp_lbg, sweden_eu_emp_lbg, sweden_ec_emp_lbg, sweden_so_emp_lbg,sweden_lr_bl_mv, sweden_eu_bl_mv, sweden_ec_bl_mv, sweden_so_bl_mv,sweden_lr_ches_mv, sweden_eu_ches_mv, sweden_ec_ches_mv, sweden_so_ches_mv,sweden_lr_emp_mv, sweden_eu_emp_mv, sweden_ec_emp_mv, sweden_so_emp_mv)
sweden_wordscores <- as.matrix(sweden_wordscores)
sweden_wordscores <- as.data.frame(sweden_wordscores)
setwd("~/Downloads/Replication Files/Wordscores/")
write.csv(sweden_wordscores, file= "sweden_wordscores.csv")


######################################################################################### Appendix C ##################################################################

setwd("~/Downloads/Replication Files/")

colnames(austria_dfm_words) <- "count"
colnames(belgiumfr_dfm_words) <- "count"
colnames(belgiumnl_dfm_words) <- "count"
colnames(cyprus_dfm_words) <- "count"
colnames(czech_dfm_words) <- "count"
colnames(denmark_dfm_words) <- "count"
colnames(estonia_dfm_words) <- "count"
colnames(finland_dfm_words) <- "count"
colnames(france_dfm_words) <- "count"
colnames(germany_dfm_words) <- "count"
colnames(greatbritain_dfm_words) <- "count"
colnames(greece_dfm_words) <- "count"
colnames(hungary_dfm_words) <- "count"
colnames(ireland_dfm_words) <- "count"
colnames(italy_dfm_words) <- "count"
colnames(latvia_dfm_words) <- "count"
colnames(lithuania_dfm_words) <- "count"
colnames(netherlands_dfm_words) <- "count"
colnames(northernireland_dfm_words) <- "count"
colnames(poland_dfm_words) <- "count"
colnames(portugal_dfm_words) <- "count"
colnames(slovakia_dfm_words) <- "count"
colnames(slovenia_dfm_words) <- "count"
colnames(spain_dfm_words) <- "count"
colnames(sweden_dfm_words) <- "count"

colnames(austria_dfm_uniquewords) <- "unique"
colnames(belgiumfr_dfm_uniquewords) <- "unique"
colnames(belgiumnl_dfm_uniquewords) <- "unique"
colnames(cyprus_dfm_uniquewords) <- "unique"
colnames(czech_dfm_uniquewords) <- "unique"
colnames(denmark_dfm_uniquewords) <- "unique"
colnames(estonia_dfm_uniquewords) <- "unique"
colnames(finland_dfm_uniquewords) <- "unique"
colnames(france_dfm_uniquewords) <- "unique"
colnames(germany_dfm_uniquewords) <- "unique"
colnames(greatbritain_dfm_uniquewords) <- "unique"
colnames(greece_dfm_uniquewords) <- "unique"
colnames(hungary_dfm_uniquewords) <- "unique"
colnames(ireland_dfm_uniquewords) <- "unique"
colnames(italy_dfm_uniquewords) <- "unique"
colnames(latvia_dfm_uniquewords) <- "unique"
colnames(lithuania_dfm_uniquewords) <- "unique"
colnames(netherlands_dfm_uniquewords) <- "unique"
colnames(northernireland_dfm_uniquewords) <- "unique"
colnames(poland_dfm_uniquewords) <- "unique"
colnames(portugal_dfm_uniquewords) <- "unique"
colnames(slovakia_dfm_uniquewords) <- "unique"
colnames(slovenia_dfm_uniquewords) <- "unique"
colnames(spain_dfm_uniquewords) <- "unique"
colnames(sweden_dfm_uniquewords) <- "unique"

count <- rbind(austria_dfm_words,belgiumfr_dfm_words,belgiumnl_dfm_words,cyprus_dfm_words,czech_dfm_words,denmark_dfm_words,estonia_dfm_words,finland_dfm_words,france_dfm_words,germany_dfm_words,greece_dfm_words,hungary_dfm_words,ireland_dfm_words,italy_dfm_words,latvia_dfm_words,lithuania_dfm_words,netherlands_dfm_words,poland_dfm_words,portugal_dfm_words,slovakia_dfm_words,slovenia_dfm_words,spain_dfm_words,sweden_dfm_words,greatbritain_dfm_words,northernireland_dfm_words)
unique <- rbind(austria_dfm_uniquewords,belgiumfr_dfm_uniquewords,belgiumnl_dfm_uniquewords,cyprus_dfm_uniquewords,czech_dfm_uniquewords,denmark_dfm_uniquewords,estonia_dfm_uniquewords,finland_dfm_uniquewords,france_dfm_uniquewords,germany_dfm_uniquewords,greece_dfm_uniquewords,hungary_dfm_uniquewords,ireland_dfm_uniquewords,italy_dfm_uniquewords,latvia_dfm_uniquewords,lithuania_dfm_uniquewords,netherlands_dfm_uniquewords,poland_dfm_uniquewords,portugal_dfm_uniquewords,slovakia_dfm_uniquewords,slovenia_dfm_uniquewords,spain_dfm_uniquewords,sweden_dfm_uniquewords,greatbritain_dfm_uniquewords,northernireland_dfm_uniquewords)

wordcount <- cbind(count,unique)
write.csv(wordcount, file= "wordcount.csv")

############################################################### Figures 1 & 2 #######################################################

library(plyr)

citation_data <- read.csv("citation_data.csv")
citation_data$Application <- as.factor(citation_data$Application)
citation_data$Journal <- as.factor(citation_data$Journal)

#Figure 1a

fig_citation_applications <- citation_data[ which(citation_data$Application==1), ]
fig_citation_total <- citation_data 

count_applications <- count(fig_citation_applications, 'Year')
count_total <- count(fig_citation_total, 'Year')
count_citation <- cbind(count_applications,count_total$freq)
count_citation <-rename(count_citation, c("freq"="applications", "count_total$freq"="total"))

ggplot()+
  geom_line(data=count_citation, aes(Year,applications), linetype="solid")+
  geom_line(data=count_citation, aes(Year,total), linetype="dashed")+
  ylab("Number of Publications")+
  scale_x_continuous(breaks=seq(2003,2017,2))+
  annotate("text", label = "Citations", x = 2011, y = 90, size = 4)+
  annotate("text", label = "Applications", x = 2014, y = 25, size = 4)+
  theme_classic()

#Figure 1b

fig_peer_applications <- citation_data[ which(citation_data$Application==1), ]
fig_peer_peerreviewed <- fig_peer_applications[ which(fig_peer_applications$Journal==1), ]
fig_peer_other <- fig_peer_applications[ which(fig_peer_applications$Journal==0), ]

count_peerreviewed <- count(fig_peer_peerreviewed, 'Year')
count_other <- count(fig_peer_other, 'Year')

ggplot()+
  geom_line(data=count_peerreviewed, aes(Year,freq, linetype="longdash"))+
  geom_line(data=count_other, aes(Year,freq, linetype="solid"))+
  ylab("Number of Publications")+
  scale_x_continuous(breaks=seq(2003,2017,2))+
  scale_y_continuous(breaks=seq(0,16,2))+
  annotate("text", label = "Citations", x = 2003.5, y = 2, size = 4)+
  annotate("text", label = "Applications", x = 2007.5, y = 2, size = 4)+
  theme_classic()+
  theme(legend.position="none")

#Figure 2

fig_impact_data <- citation_data[c(2,5)] 
fig_impact_data <- na.omit(fig_impact_data)

ggplot()+
  geom_point(data=fig_impact_data, aes(x=Year,y=Impact))+
  geom_smooth(data=fig_impact_data, aes(x=Year,y=Impact), method = "loess", span=0.7, se = FALSE, colour="black")+
  scale_x_continuous(breaks=seq(2003,2017,2))+
  scale_y_continuous(breaks=seq(0,6,1))+
  ylab("Impact Factor")+
  theme_classic()

######################################################################## Figures 3-6 ############################################################################## 

# This code calculates the data necessary for Figures 3-6 in the article.
# First the CCC is calculated, the Pearsons r in both standardized and unstandardized forms


### CCC

setwd("~/Downloads/Replication Files/Wordscores/")

austria_wordscores <- read.csv("~/Dropbox/Wordscores/Replication Files/Wordscores/austria_wordscores.csv", row.names=1)
belgiumfr_wordscores <- read.csv("~/Dropbox/Wordscores/Replication Files/Wordscores/belgiumfr_wordscores.csv", row.names=1)
belgiumnl_wordscores <- read.csv("~/Dropbox/Wordscores/Replication Files/Wordscores/belgiumnl_wordscores.csv", row.names=1)
cyprus_wordscores <- read.csv("~/Dropbox/Wordscores/Replication Files/Wordscores/cyprus_wordscores.csv", row.names=1)
czech_wordscores <- read.csv("~/Dropbox/Wordscores/Replication Files/Wordscores/czech_wordscores.csv", row.names=1)
denmark_wordscores <- read.csv("~/Dropbox/Wordscores/Replication Files/Wordscores/denmark_wordscores.csv", row.names=1)
estonia_wordscores <- read.csv("~/Dropbox/Wordscores/Replication Files/Wordscores/estonia_wordscores.csv", row.names=1)
finland_wordscores <- read.csv("~/Dropbox/Wordscores/Replication Files/Wordscores/finland_wordscores.csv", row.names=1)
france_wordscores <- read.csv("~/Dropbox/Wordscores/Replication Files/Wordscores/france_wordscores.csv", row.names=1)
germany_wordscores <- read.csv("~/Dropbox/Wordscores/Replication Files/Wordscores/germany_wordscores.csv", row.names=1)
greece_wordscores <- read.csv("~/Dropbox/Wordscores/Replication Files/Wordscores/greece_wordscores.csv", row.names=1)
hungary_wordscores <- read.csv("~/Dropbox/Wordscores/Replication Files/Wordscores/hungary_wordscores.csv", row.names=1)
ireland_wordscores <- read.csv("~/Dropbox/Wordscores/Replication Files/Wordscores/ireland_wordscores.csv", row.names=1)
italy_wordscores <- read.csv("~/Dropbox/Wordscores/Replication Files/Wordscores/italy_wordscores.csv", row.names=1)
latvia_wordscores <- read.csv("~/Dropbox/Wordscores/Replication Files/Wordscores/latvia_wordscores.csv", row.names=1)
lithuania_wordscores <- read.csv("~/Dropbox/Wordscores/Replication Files/Wordscores/lithuania_wordscores.csv", row.names=1)
netherlands_wordscores <- read.csv("~/Dropbox/Wordscores/Replication Files/Wordscores/netherlands_wordscores.csv", row.names=1)
poland_wordscores <- read.csv("~/Dropbox/Wordscores/Replication Files/Wordscores/poland_wordscores.csv", row.names=1)
portugal_wordscores <- read.csv("~/Dropbox/Wordscores/Replication Files/Wordscores/portugal_wordscores.csv", row.names=1)
slovakia_wordscores <- read.csv("~/Dropbox/Wordscores/Replication Files/Wordscores/slovakia_wordscores.csv", row.names=1)
slovenia_wordscores <- read.csv("~/Dropbox/Wordscores/Replication Files/Wordscores/slovenia_wordscores.csv", row.names=1)
spain_wordscores <- read.csv("~/Dropbox/Wordscores/Replication Files/Wordscores/spain_wordscores.csv", row.names=1)
sweden_wordscores <- read.csv("~/Dropbox/Wordscores/Replication Files/Wordscores/sweden_wordscores.csv", row.names=1)
greatbritain_wordscores <- read.csv("~/Dropbox/Wordscores/Replication Files/Wordscores/greatbritain_wordscores.csv", row.names=1)
northernireland_wordscores <- read.csv("~/Dropbox/Wordscores/Replication Files/Wordscores/northernireland_wordscores.csv", row.names=1)

rescale_new <- function(x) (x-min(x))/(max(x) - min(x))

austria<-row.names(austria_wordscores)
belgiumfr<-row.names(belgiumfr_wordscores)
belgiumnl<-row.names(belgiumnl_wordscores)
cyprus<-row.names(cyprus_wordscores)
czech<-row.names(czech_wordscores)
denmark<-row.names(denmark_wordscores)
estonia<-row.names(estonia_wordscores)
finland<-row.names(finland_wordscores)
france<-row.names(france_wordscores)
germany<-row.names(germany_wordscores)
greece<-row.names(greece_wordscores)
hungary<-row.names(hungary_wordscores)
ireland<-row.names(ireland_wordscores)
italy<-row.names(italy_wordscores)
latvia<-row.names(latvia_wordscores)
lithuania<-row.names(lithuania_wordscores)
netherlands<-row.names(netherlands_wordscores)
poland<-row.names(poland_wordscores)
portugal<-row.names(portugal_wordscores)
slovakia<-row.names(slovakia_wordscores)
slovenia<-row.names(slovenia_wordscores)
spain<-row.names(spain_wordscores)
sweden<-row.names(sweden_wordscores)
greatbritain<-row.names(greatbritain_wordscores)
northernireland<-row.names(northernireland_wordscores)

austria_wordscores<-as.data.frame(lapply(austria_wordscores,rescale_new))
belgiumfr_wordscores<-as.data.frame(lapply(belgiumfr_wordscores,rescale_new))
belgiumnl_wordscores<-as.data.frame(lapply(belgiumnl_wordscores,rescale_new))
cyprus_wordscores<-as.data.frame(lapply(cyprus_wordscores,rescale_new))
czech_wordscores<-as.data.frame(lapply(czech_wordscores,rescale_new))
denmark_wordscores<-as.data.frame(lapply(denmark_wordscores,rescale_new))
estonia_wordscores<-as.data.frame(lapply(estonia_wordscores,rescale_new))
finland_wordscores<-as.data.frame(lapply(finland_wordscores,rescale_new))
france_wordscores<-as.data.frame(lapply(france_wordscores,rescale_new))
germany_wordscores<-as.data.frame(lapply(germany_wordscores,rescale_new))
greece_wordscores<-as.data.frame(lapply(greece_wordscores,rescale_new))
hungary_wordscores<-as.data.frame(lapply(hungary_wordscores,rescale_new))
ireland_wordscores<-as.data.frame(lapply(ireland_wordscores,rescale_new))
italy_wordscores<-as.data.frame(lapply(italy_wordscores,rescale_new))
latvia_wordscores<-as.data.frame(lapply(latvia_wordscores,rescale_new))
lithuania_wordscores<-as.data.frame(lapply(lithuania_wordscores,rescale_new))
netherlands_wordscores<-as.data.frame(lapply(netherlands_wordscores,rescale_new))
poland_wordscores<-as.data.frame(lapply(poland_wordscores,rescale_new))
portugal_wordscores<-as.data.frame(lapply(portugal_wordscores,rescale_new))
slovakia_wordscores<-as.data.frame(lapply(slovakia_wordscores,rescale_new))
slovenia_wordscores<-as.data.frame(lapply(slovenia_wordscores,rescale_new))
spain_wordscores<-as.data.frame(lapply(spain_wordscores,rescale_new))
sweden_wordscores<-as.data.frame(lapply(sweden_wordscores,rescale_new))
greatbritain_wordscores<-as.data.frame(lapply(greatbritain_wordscores,rescale_new))
northernireland_wordscores<-as.data.frame(lapply(northernireland_wordscores,rescale_new))

row.names(austria_wordscores)<-austria
row.names(belgiumfr_wordscores)<-belgiumfr
row.names(belgiumnl_wordscores)<-belgiumnl
row.names(cyprus_wordscores)<-cyprus
row.names(czech_wordscores)<-czech
row.names(denmark_wordscores)<-denmark
row.names(estonia_wordscores)<-estonia
row.names(finland_wordscores)<-finland
row.names(france_wordscores)<-france
row.names(germany_wordscores)<-germany
row.names(greece_wordscores)<-greece
row.names(hungary_wordscores)<-hungary
row.names(ireland_wordscores)<-ireland
row.names(italy_wordscores)<-italy
row.names(latvia_wordscores)<-latvia
row.names(lithuania_wordscores)<-lithuania
row.names(netherlands_wordscores)<-netherlands
row.names(poland_wordscores)<-poland
row.names(portugal_wordscores)<-portugal
row.names(slovakia_wordscores)<-slovakia
row.names(slovenia_wordscores)<-slovenia
row.names(spain_wordscores)<-spain
row.names(sweden_wordscores)<-sweden
row.names(greatbritain_wordscores)<-greatbritain
row.names(northernireland_wordscores)<-northernireland

austria_wordscores$country <- rep("AT",nrow(austria_wordscores))
belgiumfr_wordscores$country <- rep("BE(FR)",nrow(belgiumfr_wordscores))
belgiumnl_wordscores$country <- rep("BE(NL)",nrow(belgiumnl_wordscores))
cyprus_wordscores$country <- rep("CY",nrow(cyprus_wordscores))
czech_wordscores$country <- rep("CZ",nrow(czech_wordscores))
denmark_wordscores$country <- rep("DK",nrow(denmark_wordscores))
estonia_wordscores$country <- rep("EE",nrow(estonia_wordscores))
finland_wordscores$country <- rep("FI",nrow(finland_wordscores))
france_wordscores$country <- rep("FR",nrow(france_wordscores))
germany_wordscores$country <- rep("DE",nrow(germany_wordscores))
greece_wordscores$country <- rep("GR",nrow(greece_wordscores))
hungary_wordscores$country <- rep("HU",nrow(hungary_wordscores))
ireland_wordscores$country <- rep("IE",nrow(ireland_wordscores))
italy_wordscores$country <- rep("IT",nrow(italy_wordscores))
latvia_wordscores$country <- rep("LV",nrow(latvia_wordscores))
lithuania_wordscores$country <- rep("LT",nrow(lithuania_wordscores))
netherlands_wordscores$country <- rep("NL",nrow(netherlands_wordscores))
poland_wordscores$country <- rep("PL",nrow(poland_wordscores))
portugal_wordscores$country <- rep("PT",nrow(portugal_wordscores))
slovakia_wordscores$country <- rep("SK",nrow(slovakia_wordscores))
slovenia_wordscores$country <- rep("SI",nrow(slovenia_wordscores))
spain_wordscores$country <- rep("ES",nrow(spain_wordscores))
sweden_wordscores$country <- rep("SE",nrow(sweden_wordscores))
greatbritain_wordscores$country <- rep("GB",nrow(greatbritain_wordscores))
northernireland_wordscores$country <- rep("NI",nrow(northernireland_wordscores))

wordscores <- rbind(austria_wordscores,belgiumfr_wordscores,belgiumnl_wordscores,cyprus_wordscores,czech_wordscores,denmark_wordscores,estonia_wordscores,finland_wordscores,france_wordscores,germany_wordscores,greece_wordscores,hungary_wordscores,ireland_wordscores,italy_wordscores,latvia_wordscores,lithuania_wordscores,netherlands_wordscores,poland_wordscores,portugal_wordscores,slovakia_wordscores,slovenia_wordscores,spain_wordscores,sweden_wordscores,greatbritain_wordscores,northernireland_wordscores)

list <- row.names(wordscores)
list <- strsplit(list, "_")
list <- t(as.data.frame(list))
list <- as.data.frame(list)
party <- list$V1
party <- as.character(party)
year <- rep(c(2004, 2009), 117)

wordscores <- cbind(party,year,wordscores)
rownames(wordscores) <- NULL
wordscores<-wordscores[year == 2009,]
wordscores$year <- NULL
country <- wordscores$country 
wordscores$country <- NULL
wordscores <- cbind(country,wordscores)

setwd("~/Downloads/Replication Files/")
write.csv(wordscores, file= "wordscores_standardized.csv", row.names = FALSE)
rm(list = ls())

# Add Benchmarks

wordscores_standardized <- read.csv("wordscores_standardized.csv")
benchmarks <- read.csv("benchmarks.csv")
analysis <- merge(wordscores_standardized, benchmarks, by = c("country","party"))
analysis <- analysis[,c(1,2,27,3:26,28:39)]

# LR

lr_1 <- unlist(c(epi.ccc(analysis$bl_lr_lbg, analysis$ches_lr_2010, ci = "z-transform", conf.level = 0.95)$rho.c, "LR","BL","CHES","LBG"))
lr_2 <- unlist(c(epi.ccc(analysis$bl_lr_lbg, analysis$emp_lr_2009, ci = "z-transform", conf.level = 0.95)$rho.c, "LR","BL","EMP","LBG"))
lr_3 <- unlist(c(epi.ccc(analysis$bl_lr_lbg, analysis$eup_lr_2009, ci = "z-transform", conf.level = 0.95)$rho.c, "LR","BL","EUP","LBG"))
lr_4 <- unlist(c(epi.ccc(analysis$bl_lr_mv, analysis$ches_lr_2010, ci = "z-transform", conf.level = 0.95)$rho.c, "LR","BL","CHES","MV"))
lr_5 <- unlist(c(epi.ccc(analysis$bl_lr_mv, analysis$emp_lr_2009, ci = "z-transform", conf.level = 0.95)$rho.c, "LR","BL","EMP","MV"))
lr_6 <- unlist(c(epi.ccc(analysis$bl_lr_mv, analysis$eup_lr_2009, ci = "z-transform", conf.level = 0.95)$rho.c, "LR","BL","EUP","MV"))

lr_7 <- unlist(c(epi.ccc(analysis$ches_lr_lbg, analysis$ches_lr_2010, ci = "z-transform", conf.level = 0.95)$rho.c, "LR","CHES","CHES","LBG"))
lr_8 <- unlist(c(epi.ccc(analysis$ches_lr_lbg, analysis$emp_lr_2009, ci = "z-transform", conf.level = 0.95)$rho.c, "LR","CHES","EMP","LBG"))
lr_9 <- unlist(c(epi.ccc(analysis$ches_lr_lbg, analysis$eup_lr_2009, ci = "z-transform", conf.level = 0.95)$rho.c, "LR","CHES","EUP","LBG"))
lr_10 <- unlist(c(epi.ccc(analysis$ches_lr_mv, analysis$ches_lr_2010, ci = "z-transform", conf.level = 0.95)$rho.c, "LR","CHES","CHES","MV"))
lr_11 <- unlist(c(epi.ccc(analysis$ches_lr_mv, analysis$emp_lr_2009, ci = "z-transform", conf.level = 0.95)$rho.c, "LR","CHES","EMP","MV"))
lr_12 <- unlist(c(epi.ccc(analysis$ches_lr_mv, analysis$eup_lr_2009, ci = "z-transform", conf.level = 0.95)$rho.c, "LR","CHES","EUP","MV"))

lr_13 <- unlist(c(epi.ccc(analysis$emp_lr_lbg, analysis$ches_lr_2010, ci = "z-transform", conf.level = 0.95)$rho.c, "LR","EMP","CHES","LBG"))
lr_14 <- unlist(c(epi.ccc(analysis$emp_lr_lbg, analysis$emp_lr_2009, ci = "z-transform", conf.level = 0.95)$rho.c, "LR","EMP","EMP","LBG"))
lr_15 <- unlist(c(epi.ccc(analysis$emp_lr_lbg, analysis$eup_lr_2009, ci = "z-transform", conf.level = 0.95)$rho.c, "LR","EMP","EUP","LBG"))
lr_16 <- unlist(c(epi.ccc(analysis$emp_lr_mv, analysis$ches_lr_2010, ci = "z-transform", conf.level = 0.95)$rho.c, "LR","EMP","CHES","MV"))
lr_17 <- unlist(c(epi.ccc(analysis$emp_lr_mv, analysis$emp_lr_2009, ci = "z-transform", conf.level = 0.95)$rho.c, "LR","EMP","EMP","MV"))
lr_18 <- unlist(c(epi.ccc(analysis$emp_lr_mv, analysis$eup_lr_2009, ci = "z-transform", conf.level = 0.95)$rho.c, "LR","EMP","EUP","MV"))

# EU

eu_1 <- unlist(c(epi.ccc(analysis$bl_eu_lbg, analysis$ches_eu_2010, ci = "z-transform", conf.level = 0.95)$rho.c, "EU","BL","CHES","LBG"))
eu_2 <- unlist(c(epi.ccc(analysis$bl_eu_lbg, analysis$emp_eu_2009, ci = "z-transform", conf.level = 0.95)$rho.c, "EU","BL","EMP","LBG"))
eu_3 <- unlist(c(epi.ccc(analysis$bl_eu_lbg, analysis$eup_eu_2009, ci = "z-transform", conf.level = 0.95)$rho.c, "EU","BL","EUP","LBG"))
eu_4 <- unlist(c(epi.ccc(analysis$bl_eu_mv, analysis$ches_eu_2010, ci = "z-transform", conf.level = 0.95)$rho.c, "EU","BL","CHES","MV"))
eu_5 <- unlist(c(epi.ccc(analysis$bl_eu_mv, analysis$emp_eu_2009, ci = "z-transform", conf.level = 0.95)$rho.c, "EU","BL","EMP","MV"))
eu_6 <- unlist(c(epi.ccc(analysis$bl_eu_mv, analysis$eup_eu_2009, ci = "z-transform", conf.level = 0.95)$rho.c, "EU","BL","EUP","MV"))

eu_7 <- unlist(c(epi.ccc(analysis$ches_eu_lbg, analysis$ches_eu_2010, ci = "z-transform", conf.level = 0.95)$rho.c, "EU","CHES","CHES","LBG"))
eu_8 <- unlist(c(epi.ccc(analysis$ches_eu_lbg, analysis$emp_eu_2009, ci = "z-transform", conf.level = 0.95)$rho.c, "EU","CHES","EMP","LBG"))
eu_9 <- unlist(c(epi.ccc(analysis$ches_eu_lbg, analysis$eup_eu_2009, ci = "z-transform", conf.level = 0.95)$rho.c, "EU","CHES","EUP","LBG"))
eu_10 <- unlist(c(epi.ccc(analysis$ches_eu_mv, analysis$ches_eu_2010, ci = "z-transform", conf.level = 0.95)$rho.c, "EU","CHES","CHES","MV"))
eu_11 <- unlist(c(epi.ccc(analysis$ches_eu_mv, analysis$emp_eu_2009, ci = "z-transform", conf.level = 0.95)$rho.c, "EU","CHES","EMP","MV"))
eu_12 <- unlist(c(epi.ccc(analysis$ches_eu_mv, analysis$eup_eu_2009, ci = "z-transform", conf.level = 0.95)$rho.c, "EU","CHES","EUP","MV"))

eu_13 <- unlist(c(epi.ccc(analysis$emp_eu_lbg, analysis$ches_eu_2010, ci = "z-transform", conf.level = 0.95)$rho.c, "EU","EMP","CHES","LBG"))
eu_14 <- unlist(c(epi.ccc(analysis$emp_eu_lbg, analysis$emp_eu_2009, ci = "z-transform", conf.level = 0.95)$rho.c, "EU","EMP","EMP","LBG"))
eu_15 <- unlist(c(epi.ccc(analysis$emp_eu_lbg, analysis$eup_eu_2009, ci = "z-transform", conf.level = 0.95)$rho.c, "EU","EMP","EUP","LBG"))
eu_16 <- unlist(c(epi.ccc(analysis$emp_eu_mv, analysis$ches_eu_2010, ci = "z-transform", conf.level = 0.95)$rho.c, "EU","EMP","CHES","MV"))
eu_17 <- unlist(c(epi.ccc(analysis$emp_eu_mv, analysis$emp_eu_2009, ci = "z-transform", conf.level = 0.95)$rho.c, "EU","EMP","EMP","MV"))
eu_18 <- unlist(c(epi.ccc(analysis$emp_eu_mv, analysis$eup_eu_2009, ci = "z-transform", conf.level = 0.95)$rho.c, "EU","EMP","EUP","MV"))

# EC

ec_1 <- unlist(c(epi.ccc(analysis$bl_ec_lbg, analysis$ches_ec_2010, ci = "z-transform", conf.level = 0.95)$rho.c, "EC","BL","CHES","LBG"))
ec_2 <- unlist(c(epi.ccc(analysis$bl_ec_lbg, analysis$emp_ec_2009, ci = "z-transform", conf.level = 0.95)$rho.c, "EC","BL","EMP","LBG"))
ec_3 <- unlist(c(epi.ccc(analysis$bl_ec_lbg, analysis$eup_ec_2009, ci = "z-transform", conf.level = 0.95)$rho.c, "EC","BL","EUP","LBG"))
ec_4 <- unlist(c(epi.ccc(analysis$bl_ec_mv, analysis$ches_ec_2010, ci = "z-transform", conf.level = 0.95)$rho.c, "EC","BL","CHES","MV"))
ec_5 <- unlist(c(epi.ccc(analysis$bl_ec_mv, analysis$emp_ec_2009, ci = "z-transform", conf.level = 0.95)$rho.c, "EC","BL","EMP","MV"))
ec_6 <- unlist(c(epi.ccc(analysis$bl_ec_mv, analysis$eup_ec_2009, ci = "z-transform", conf.level = 0.95)$rho.c, "EC","BL","EUP","MV"))

ec_7 <- unlist(c(epi.ccc(analysis$ches_ec_lbg, analysis$ches_ec_2010, ci = "z-transform", conf.level = 0.95)$rho.c, "EC","CHES","CHES","LBG"))
ec_8 <- unlist(c(epi.ccc(analysis$ches_ec_lbg, analysis$emp_ec_2009, ci = "z-transform", conf.level = 0.95)$rho.c, "EC","CHES","EMP","LBG"))
ec_9 <- unlist(c(epi.ccc(analysis$ches_ec_lbg, analysis$eup_ec_2009, ci = "z-transform", conf.level = 0.95)$rho.c, "EC","CHES","EUP","LBG"))
ec_10 <- unlist(c(epi.ccc(analysis$ches_ec_mv, analysis$ches_ec_2010, ci = "z-transform", conf.level = 0.95)$rho.c, "EC","CHES","CHES","MV"))
ec_11 <- unlist(c(epi.ccc(analysis$ches_ec_mv, analysis$emp_ec_2009, ci = "z-transform", conf.level = 0.95)$rho.c, "EC","CHES","EMP","MV"))
ec_12 <- unlist(c(epi.ccc(analysis$ches_ec_mv, analysis$eup_ec_2009, ci = "z-transform", conf.level = 0.95)$rho.c, "EC","CHES","EUP","MV"))

ec_13 <- unlist(c(epi.ccc(analysis$emp_ec_lbg, analysis$ches_ec_2010, ci = "z-transform", conf.level = 0.95)$rho.c, "EC","EMP","CHES","LBG"))
ec_14 <- unlist(c(epi.ccc(analysis$emp_ec_lbg, analysis$emp_ec_2009, ci = "z-transform", conf.level = 0.95)$rho.c, "EC","EMP","EMP","LBG"))
ec_15 <- unlist(c(epi.ccc(analysis$emp_ec_lbg, analysis$eup_ec_2009, ci = "z-transform", conf.level = 0.95)$rho.c, "EC","EMP","EUP","LBG"))
ec_16 <- unlist(c(epi.ccc(analysis$emp_ec_mv, analysis$ches_ec_2010, ci = "z-transform", conf.level = 0.95)$rho.c, "EC","EMP","CHES","MV"))
ec_17 <- unlist(c(epi.ccc(analysis$emp_ec_mv, analysis$emp_ec_2009, ci = "z-transform", conf.level = 0.95)$rho.c, "EC","EMP","EMP","MV"))
ec_18 <- unlist(c(epi.ccc(analysis$emp_ec_mv, analysis$eup_ec_2009, ci = "z-transform", conf.level = 0.95)$rho.c, "EC","EMP","EUP","MV"))

# SO

so_1 <- unlist(c(epi.ccc(analysis$bl_so_lbg, analysis$ches_so_2010, ci = "z-transform", conf.level = 0.95)$rho.c, "SO","BL","CHES","LBG"))
so_2 <- unlist(c(epi.ccc(analysis$bl_so_lbg, analysis$emp_so_2009, ci = "z-transform", conf.level = 0.95)$rho.c, "SO","BL","EMP","LBG"))
so_3 <- unlist(c(epi.ccc(analysis$bl_so_lbg, analysis$eup_so_2009, ci = "z-transform", conf.level = 0.95)$rho.c, "SO","BL","EUP","LBG"))
so_4 <- unlist(c(epi.ccc(analysis$bl_so_mv, analysis$ches_so_2010, ci = "z-transform", conf.level = 0.95)$rho.c, "SO","BL","CHES","MV"))
so_5 <- unlist(c(epi.ccc(analysis$bl_so_mv, analysis$emp_so_2009, ci = "z-transform", conf.level = 0.95)$rho.c, "SO","BL","EMP","MV"))
so_6 <- unlist(c(epi.ccc(analysis$bl_so_mv, analysis$eup_so_2009, ci = "z-transform", conf.level = 0.95)$rho.c, "SO","BL","EUP","MV"))

so_7 <- unlist(c(epi.ccc(analysis$ches_so_lbg, analysis$ches_so_2010, ci = "z-transform", conf.level = 0.95)$rho.c, "SO","CHES","CHES","LBG"))
so_8 <- unlist(c(epi.ccc(analysis$ches_so_lbg, analysis$emp_so_2009, ci = "z-transform", conf.level = 0.95)$rho.c, "SO","CHES","EMP","LBG"))
so_9 <- unlist(c(epi.ccc(analysis$ches_so_lbg, analysis$eup_so_2009, ci = "z-transform", conf.level = 0.95)$rho.c, "SO","CHES","EUP","LBG"))
so_10 <- unlist(c(epi.ccc(analysis$ches_so_mv, analysis$ches_so_2010, ci = "z-transform", conf.level = 0.95)$rho.c, "SO","CHES","CHES","MV"))
so_11 <- unlist(c(epi.ccc(analysis$ches_so_mv, analysis$emp_so_2009, ci = "z-transform", conf.level = 0.95)$rho.c, "SO","CHES","EMP","MV"))
so_12 <- unlist(c(epi.ccc(analysis$ches_so_mv, analysis$eup_so_2009, ci = "z-transform", conf.level = 0.95)$rho.c, "SO","CHES","EUP","MV"))

so_13 <- unlist(c(epi.ccc(analysis$emp_so_lbg, analysis$ches_so_2010, ci = "z-transform", conf.level = 0.95)$rho.c, "SO","EMP","CHES","LBG"))
so_14 <- unlist(c(epi.ccc(analysis$emp_so_lbg, analysis$emp_so_2009, ci = "z-transform", conf.level = 0.95)$rho.c, "SO","EMP","EMP","LBG"))
so_15 <- unlist(c(epi.ccc(analysis$emp_so_lbg, analysis$eup_so_2009, ci = "z-transform", conf.level = 0.95)$rho.c, "SO","EMP","EUP","LBG"))
so_16 <- unlist(c(epi.ccc(analysis$emp_so_mv, analysis$ches_so_2010, ci = "z-transform", conf.level = 0.95)$rho.c, "SO","EMP","CHES","MV"))
so_17 <- unlist(c(epi.ccc(analysis$emp_so_mv, analysis$emp_so_2009, ci = "z-transform", conf.level = 0.95)$rho.c, "SO","EMP","EMP","MV"))
so_18 <- unlist(c(epi.ccc(analysis$emp_so_mv, analysis$eup_so_2009, ci = "z-transform", conf.level = 0.95)$rho.c, "SO","EMP","EUP","MV"))

ccc <- rbind(lr_1,lr_2,lr_3,lr_4,lr_5,lr_6,lr_7,lr_8,lr_9,lr_10,lr_11,lr_12,lr_13,lr_14,lr_15,lr_16,lr_17,lr_18,eu_1,eu_2,eu_3,eu_4,eu_5,eu_6,eu_7,eu_8,eu_9,eu_10,eu_11,eu_12,eu_13,eu_14,eu_15,eu_16,eu_17,eu_18,ec_1,ec_2,ec_3,ec_4,ec_5,ec_6,ec_7,ec_8,ec_9,ec_10,ec_11,ec_12,ec_13,ec_14,ec_15,ec_16,ec_17,ec_18,so_1,so_2,so_3,so_4,so_5,so_6,so_7,so_8,so_9,so_10,so_11,so_12,so_13,so_14,so_15,so_16,so_17,so_18)

ccc <- as.data.frame(ccc)
colnames(ccc) <- c("ccc","ccc_lower","ccc_upper","dimension","reference","benchmark","rescaling")
rownames(ccc) <- NULL
ccc$ccc <- as.numeric(as.character(ccc$ccc))
ccc$ccc_lower <- as.numeric(as.character(ccc$ccc_lower))
ccc$ccc_upper <- as.numeric(as.character(ccc$ccc_upper))
ccc$dimension <- as.factor(as.character(ccc$dimension))
ccc$reference <- as.factor(as.character(ccc$reference))
ccc$benchmark <- as.factor(as.character(ccc$benchmark))
ccc$rescaling <- as.factor(as.character(ccc$rescaling))

### Pearsons r

setwd("~/Downloads/Replication Files/Wordscores/")

austria_wordscores <- read.csv("~/Dropbox/Wordscores/Replication Files/Wordscores/austria_wordscores.csv", row.names=1)
belgiumfr_wordscores <- read.csv("~/Dropbox/Wordscores/Replication Files/Wordscores/belgiumfr_wordscores.csv", row.names=1)
belgiumnl_wordscores <- read.csv("~/Dropbox/Wordscores/Replication Files/Wordscores/belgiumnl_wordscores.csv", row.names=1)
cyprus_wordscores <- read.csv("~/Dropbox/Wordscores/Replication Files/Wordscores/cyprus_wordscores.csv", row.names=1)
czech_wordscores <- read.csv("~/Dropbox/Wordscores/Replication Files/Wordscores/czech_wordscores.csv", row.names=1)
denmark_wordscores <- read.csv("~/Dropbox/Wordscores/Replication Files/Wordscores/denmark_wordscores.csv", row.names=1)
estonia_wordscores <- read.csv("~/Dropbox/Wordscores/Replication Files/Wordscores/estonia_wordscores.csv", row.names=1)
finland_wordscores <- read.csv("~/Dropbox/Wordscores/Replication Files/Wordscores/finland_wordscores.csv", row.names=1)
france_wordscores <- read.csv("~/Dropbox/Wordscores/Replication Files/Wordscores/france_wordscores.csv", row.names=1)
germany_wordscores <- read.csv("~/Dropbox/Wordscores/Replication Files/Wordscores/germany_wordscores.csv", row.names=1)
greece_wordscores <- read.csv("~/Dropbox/Wordscores/Replication Files/Wordscores/greece_wordscores.csv", row.names=1)
hungary_wordscores <- read.csv("~/Dropbox/Wordscores/Replication Files/Wordscores/hungary_wordscores.csv", row.names=1)
ireland_wordscores <- read.csv("~/Dropbox/Wordscores/Replication Files/Wordscores/ireland_wordscores.csv", row.names=1)
italy_wordscores <- read.csv("~/Dropbox/Wordscores/Replication Files/Wordscores/italy_wordscores.csv", row.names=1)
latvia_wordscores <- read.csv("~/Dropbox/Wordscores/Replication Files/Wordscores/latvia_wordscores.csv", row.names=1)
lithuania_wordscores <- read.csv("~/Dropbox/Wordscores/Replication Files/Wordscores/lithuania_wordscores.csv", row.names=1)
netherlands_wordscores <- read.csv("~/Dropbox/Wordscores/Replication Files/Wordscores/netherlands_wordscores.csv", row.names=1)
poland_wordscores <- read.csv("~/Dropbox/Wordscores/Replication Files/Wordscores/poland_wordscores.csv", row.names=1)
portugal_wordscores <- read.csv("~/Dropbox/Wordscores/Replication Files/Wordscores/portugal_wordscores.csv", row.names=1)
slovakia_wordscores <- read.csv("~/Dropbox/Wordscores/Replication Files/Wordscores/slovakia_wordscores.csv", row.names=1)
slovenia_wordscores <- read.csv("~/Dropbox/Wordscores/Replication Files/Wordscores/slovenia_wordscores.csv", row.names=1)
spain_wordscores <- read.csv("~/Dropbox/Wordscores/Replication Files/Wordscores/spain_wordscores.csv", row.names=1)
sweden_wordscores <- read.csv("~/Dropbox/Wordscores/Replication Files/Wordscores/sweden_wordscores.csv", row.names=1)
greatbritain_wordscores <- read.csv("~/Dropbox/Wordscores/Replication Files/Wordscores/greatbritain_wordscores.csv", row.names=1)
northernireland_wordscores <- read.csv("~/Dropbox/Wordscores/Replication Files/Wordscores/northernireland_wordscores.csv", row.names=1)

austria_wordscores$country <- rep("AT",nrow(austria_wordscores))
belgiumfr_wordscores$country <- rep("BE(FR)",nrow(belgiumfr_wordscores))
belgiumnl_wordscores$country <- rep("BE(NL)",nrow(belgiumnl_wordscores))
cyprus_wordscores$country <- rep("CY",nrow(cyprus_wordscores))
czech_wordscores$country <- rep("CZ",nrow(czech_wordscores))
denmark_wordscores$country <- rep("DK",nrow(denmark_wordscores))
estonia_wordscores$country <- rep("EE",nrow(estonia_wordscores))
finland_wordscores$country <- rep("FI",nrow(finland_wordscores))
france_wordscores$country <- rep("FR",nrow(france_wordscores))
germany_wordscores$country <- rep("DE",nrow(germany_wordscores))
greece_wordscores$country <- rep("GR",nrow(greece_wordscores))
hungary_wordscores$country <- rep("HU",nrow(hungary_wordscores))
ireland_wordscores$country <- rep("IE",nrow(ireland_wordscores))
italy_wordscores$country <- rep("IT",nrow(italy_wordscores))
latvia_wordscores$country <- rep("LV",nrow(latvia_wordscores))
lithuania_wordscores$country <- rep("LT",nrow(lithuania_wordscores))
netherlands_wordscores$country <- rep("NL",nrow(netherlands_wordscores))
poland_wordscores$country <- rep("PL",nrow(poland_wordscores))
portugal_wordscores$country <- rep("PT",nrow(portugal_wordscores))
slovakia_wordscores$country <- rep("SK",nrow(slovakia_wordscores))
slovenia_wordscores$country <- rep("SI",nrow(slovenia_wordscores))
spain_wordscores$country <- rep("ES",nrow(spain_wordscores))
sweden_wordscores$country <- rep("SE",nrow(sweden_wordscores))
greatbritain_wordscores$country <- rep("GB",nrow(greatbritain_wordscores))
northernireland_wordscores$country <- rep("NI",nrow(northernireland_wordscores))

wordscores <- rbind(austria_wordscores,belgiumfr_wordscores,belgiumnl_wordscores,cyprus_wordscores,czech_wordscores,denmark_wordscores,estonia_wordscores,finland_wordscores,france_wordscores,germany_wordscores,greece_wordscores,hungary_wordscores,ireland_wordscores,italy_wordscores,latvia_wordscores,lithuania_wordscores,netherlands_wordscores,poland_wordscores,portugal_wordscores,slovakia_wordscores,slovenia_wordscores,spain_wordscores,sweden_wordscores,greatbritain_wordscores,northernireland_wordscores)

list <- row.names(wordscores)
list <- strsplit(list, "_")
list <- t(as.data.frame(list))
list <- as.data.frame(list)
party <- list$V1
party <- as.character(party)
year <- rep(c(2004, 2009), 117)

wordscores <- cbind(party,year,wordscores)
rownames(wordscores) <- NULL
wordscores<-wordscores[year == 2009,]
wordscores$year <- NULL
country <- wordscores$country 
wordscores$country <- NULL
wordscores <- cbind(country,wordscores)

setwd("~/Downloads/Replication Files/")
write.csv(wordscores, file= "wordscores_unstandardized.csv", row.names = FALSE)

# Add Benchmarks

wordscores_unstandardized <- read.csv("wordscores_unstandardized.csv")
benchmarks <- read.csv("benchmarks.csv")
analysis <- merge(wordscores_unstandardized, benchmarks, by = c("country","party"))
analysis <- analysis[,c(1,2,27,3:26,28:39)]

# LR

lr_1 <- unlist(c(cor.test(analysis$bl_lr_lbg, analysis$ches_lr_2010, method = "pearson", conf.level = 0.95)$estimate,cor.test(analysis$bl_lr_lbg, analysis$ches_lr_2010, method = "pearson", conf.level = 0.95)$conf.int, "LR","BL","CHES","LBG"))
lr_2 <- unlist(c(cor.test(analysis$bl_lr_lbg, analysis$emp_lr_2009, method = "pearson", conf.level = 0.95)$estimate,cor.test(analysis$bl_lr_lbg, analysis$emp_lr_2009, method = "pearson", conf.level = 0.95)$conf.int, "LR","BL","EMP","LBG"))
lr_3 <- unlist(c(cor.test(analysis$bl_lr_lbg, analysis$eup_lr_2009, method = "pearson", conf.level = 0.95)$estimate,cor.test(analysis$bl_lr_lbg, analysis$eup_lr_2009, method = "pearson", conf.level = 0.95)$conf.int, "LR","BL","EUP","LBG"))
lr_4 <- unlist(c(cor.test(analysis$bl_lr_mv, analysis$ches_lr_2010, method = "pearson", conf.level = 0.95)$estimate,cor.test(analysis$bl_lr_mv, analysis$ches_lr_2010, method = "pearson", conf.level = 0.95)$conf.int, "LR","BL","CHES","MV"))
lr_5 <- unlist(c(cor.test(analysis$bl_lr_mv, analysis$emp_lr_2009, method = "pearson", conf.level = 0.95)$estimate,cor.test(analysis$bl_lr_mv, analysis$emp_lr_2009, method = "pearson", conf.level = 0.95)$conf.int, "LR","BL","EMP","MV"))
lr_6 <- unlist(c(cor.test(analysis$bl_lr_mv, analysis$eup_lr_2009, method = "pearson", conf.level = 0.95)$estimate,cor.test(analysis$bl_lr_mv, analysis$eup_lr_2009, method = "pearson", conf.level = 0.95)$conf.int, "LR","BL","EUP","MV"))

lr_7 <- unlist(c(cor.test(analysis$ches_lr_lbg, analysis$ches_lr_2010, method = "pearson", conf.level = 0.95)$estimate,cor.test(analysis$ches_lr_lbg, analysis$ches_lr_2010, method = "pearson", conf.level = 0.95)$conf.int, "LR","CHES","CHES","LBG"))
lr_8 <- unlist(c(cor.test(analysis$ches_lr_lbg, analysis$emp_lr_2009, method = "pearson", conf.level = 0.95)$estimate,cor.test(analysis$ches_lr_lbg, analysis$emp_lr_2009, method = "pearson", conf.level = 0.95)$conf.int, "LR","CHES","EMP","LBG"))
lr_9 <- unlist(c(cor.test(analysis$ches_lr_lbg, analysis$eup_lr_2009, method = "pearson", conf.level = 0.95)$estimate,cor.test(analysis$ches_lr_lbg, analysis$eup_lr_2009, method = "pearson", conf.level = 0.95)$conf.int, "LR","CHES","EUP","LBG"))
lr_10 <- unlist(c(cor.test(analysis$ches_lr_mv, analysis$ches_lr_2010, method = "pearson", conf.level = 0.95)$estimate,cor.test(analysis$ches_lr_mv, analysis$ches_lr_2010, method = "pearson", conf.level = 0.95)$conf.int, "LR","CHES","CHES","MV"))
lr_11 <- unlist(c(cor.test(analysis$ches_lr_mv, analysis$emp_lr_2009, method = "pearson", conf.level = 0.95)$estimate,cor.test(analysis$ches_lr_mv, analysis$emp_lr_2009, method = "pearson", conf.level = 0.95)$conf.int, "LR","CHES","EMP","MV"))
lr_12 <- unlist(c(cor.test(analysis$ches_lr_mv, analysis$eup_lr_2009, method = "pearson", conf.level = 0.95)$estimate,cor.test(analysis$ches_lr_mv, analysis$eup_lr_2009, method = "pearson", conf.level = 0.95)$conf.int, "LR","CHES","EUP","MV"))

lr_13 <- unlist(c(cor.test(analysis$emp_lr_lbg, analysis$ches_lr_2010, method = "pearson", conf.level = 0.95)$estimate,cor.test(analysis$emp_lr_lbg, analysis$ches_lr_2010, method = "pearson", conf.level = 0.95)$conf.int, "LR","EMP","CHES","LBG"))
lr_14 <- unlist(c(cor.test(analysis$emp_lr_lbg, analysis$emp_lr_2009, method = "pearson", conf.level = 0.95)$estimate,cor.test(analysis$emp_lr_lbg, analysis$emp_lr_2009, method = "pearson", conf.level = 0.95)$conf.int, "LR","EMP","EMP","LBG"))
lr_15 <- unlist(c(cor.test(analysis$emp_lr_lbg, analysis$eup_lr_2009, method = "pearson", conf.level = 0.95)$estimate,cor.test(analysis$emp_lr_lbg, analysis$eup_lr_2009, method = "pearson", conf.level = 0.95)$conf.int, "LR","EMP","EUP","LBG"))
lr_16 <- unlist(c(cor.test(analysis$emp_lr_mv, analysis$ches_lr_2010, method = "pearson", conf.level = 0.95)$estimate,cor.test(analysis$emp_lr_mv, analysis$ches_lr_2010, method = "pearson", conf.level = 0.95)$conf.int, "LR","EMP","CHES","MV"))
lr_17 <- unlist(c(cor.test(analysis$emp_lr_mv, analysis$emp_lr_2009, method = "pearson", conf.level = 0.95)$estimate,cor.test(analysis$emp_lr_mv, analysis$emp_lr_2009, method = "pearson", conf.level = 0.95)$conf.int, "LR","EMP","EMP","MV"))
lr_18 <- unlist(c(cor.test(analysis$emp_lr_mv, analysis$eup_lr_2009, method = "pearson", conf.level = 0.95)$estimate,cor.test(analysis$emp_lr_mv, analysis$eup_lr_2009, method = "pearson", conf.level = 0.95)$conf.int, "LR","EMP","EUP","MV"))

# EU

eu_1 <- unlist(c(cor.test(analysis$bl_eu_lbg, analysis$ches_eu_2010, method = "pearson", conf.level = 0.95)$estimate,cor.test(analysis$bl_eu_lbg, analysis$ches_eu_2010, method = "pearson", conf.level = 0.95)$conf.int, "EU","BL","CHES","LBG"))
eu_2 <- unlist(c(cor.test(analysis$bl_eu_lbg, analysis$emp_eu_2009, method = "pearson", conf.level = 0.95)$estimate,cor.test(analysis$bl_eu_lbg, analysis$emp_eu_2009, method = "pearson", conf.level = 0.95)$conf.int, "EU","BL","EMP","LBG"))
eu_3 <- unlist(c(cor.test(analysis$bl_eu_lbg, analysis$eup_eu_2009, method = "pearson", conf.level = 0.95)$estimate,cor.test(analysis$bl_eu_lbg, analysis$eup_eu_2009, method = "pearson", conf.level = 0.95)$conf.int, "EU","BL","EUP","LBG"))
eu_4 <- unlist(c(cor.test(analysis$bl_eu_mv, analysis$ches_eu_2010, method = "pearson", conf.level = 0.95)$estimate,cor.test(analysis$bl_eu_mv, analysis$ches_eu_2010, method = "pearson", conf.level = 0.95)$conf.int, "EU","BL","CHES","MV"))
eu_5 <- unlist(c(cor.test(analysis$bl_eu_mv, analysis$emp_eu_2009, method = "pearson", conf.level = 0.95)$estimate,cor.test(analysis$bl_eu_mv, analysis$emp_eu_2009, method = "pearson", conf.level = 0.95)$conf.int, "EU","BL","EMP","MV"))
eu_6 <- unlist(c(cor.test(analysis$bl_eu_mv, analysis$eup_eu_2009, method = "pearson", conf.level = 0.95)$estimate,cor.test(analysis$bl_eu_mv, analysis$eup_eu_2009, method = "pearson", conf.level = 0.95)$conf.int, "EU","BL","EUP","MV"))

eu_7 <- unlist(c(cor.test(analysis$ches_eu_lbg, analysis$ches_eu_2010, method = "pearson", conf.level = 0.95)$estimate,cor.test(analysis$ches_eu_lbg, analysis$ches_eu_2010, method = "pearson", conf.level = 0.95)$conf.int, "EU","CHES","CHES","LBG"))
eu_8 <- unlist(c(cor.test(analysis$ches_eu_lbg, analysis$emp_eu_2009, method = "pearson", conf.level = 0.95)$estimate,cor.test(analysis$ches_eu_lbg, analysis$emp_eu_2009, method = "pearson", conf.level = 0.95)$conf.int, "EU","CHES","EMP","LBG"))
eu_9 <- unlist(c(cor.test(analysis$ches_eu_lbg, analysis$eup_eu_2009, method = "pearson", conf.level = 0.95)$estimate,cor.test(analysis$ches_eu_lbg, analysis$eup_eu_2009, method = "pearson", conf.level = 0.95)$conf.int, "EU","CHES","EUP","LBG"))
eu_10 <- unlist(c(cor.test(analysis$ches_eu_mv, analysis$ches_eu_2010, method = "pearson", conf.level = 0.95)$estimate,cor.test(analysis$ches_eu_mv, analysis$ches_eu_2010, method = "pearson", conf.level = 0.95)$conf.int, "EU","CHES","CHES","MV"))
eu_11 <- unlist(c(cor.test(analysis$ches_eu_mv, analysis$emp_eu_2009, method = "pearson", conf.level = 0.95)$estimate,cor.test(analysis$ches_eu_mv, analysis$emp_eu_2009, method = "pearson", conf.level = 0.95)$conf.int, "EU","CHES","EMP","MV"))
eu_12 <- unlist(c(cor.test(analysis$ches_eu_mv, analysis$eup_eu_2009, method = "pearson", conf.level = 0.95)$estimate,cor.test(analysis$ches_eu_mv, analysis$eup_eu_2009, method = "pearson", conf.level = 0.95)$conf.int, "EU","CHES","EUP","MV"))

eu_13 <- unlist(c(cor.test(analysis$emp_eu_lbg, analysis$ches_eu_2010, method = "pearson", conf.level = 0.95)$estimate,cor.test(analysis$emp_eu_lbg, analysis$ches_eu_2010, method = "pearson", conf.level = 0.95)$conf.int, "EU","EMP","CHES","LBG"))
eu_14 <- unlist(c(cor.test(analysis$emp_eu_lbg, analysis$emp_eu_2009, method = "pearson", conf.level = 0.95)$estimate,cor.test(analysis$emp_eu_lbg, analysis$emp_eu_2009, method = "pearson", conf.level = 0.95)$conf.int, "EU","EMP","EMP","LBG"))
eu_15 <- unlist(c(cor.test(analysis$emp_eu_lbg, analysis$eup_eu_2009, method = "pearson", conf.level = 0.95)$estimate,cor.test(analysis$emp_eu_lbg, analysis$eup_eu_2009, method = "pearson", conf.level = 0.95)$conf.int, "EU","EMP","EUP","LBG"))
eu_16 <- unlist(c(cor.test(analysis$emp_eu_mv, analysis$ches_eu_2010, method = "pearson", conf.level = 0.95)$estimate,cor.test(analysis$emp_eu_mv, analysis$ches_eu_2010, method = "pearson", conf.level = 0.95)$conf.int, "EU","EMP","CHES","MV"))
eu_17 <- unlist(c(cor.test(analysis$emp_eu_mv, analysis$emp_eu_2009, method = "pearson", conf.level = 0.95)$estimate,cor.test(analysis$emp_eu_mv, analysis$emp_eu_2009, method = "pearson", conf.level = 0.95)$conf.int, "EU","EMP","EMP","MV"))
eu_18 <- unlist(c(cor.test(analysis$emp_eu_mv, analysis$eup_eu_2009, method = "pearson", conf.level = 0.95)$estimate,cor.test(analysis$emp_eu_mv, analysis$eup_eu_2009, method = "pearson", conf.level = 0.95)$conf.int, "EU","EMP","EUP","MV"))

# EC

ec_1 <- unlist(c(cor.test(analysis$bl_ec_lbg, analysis$ches_ec_2010, method = "pearson", conf.level = 0.95)$estimate,cor.test(analysis$bl_ec_lbg, analysis$ches_ec_2010, method = "pearson", conf.level = 0.95)$conf.int, "EC","BL","CHES","LBG"))
ec_2 <- unlist(c(cor.test(analysis$bl_ec_lbg, analysis$emp_ec_2009, method = "pearson", conf.level = 0.95)$estimate,cor.test(analysis$bl_ec_lbg, analysis$emp_ec_2009, method = "pearson", conf.level = 0.95)$conf.int, "EC","BL","EMP","LBG"))
ec_3 <- unlist(c(cor.test(analysis$bl_ec_lbg, analysis$eup_ec_2009, method = "pearson", conf.level = 0.95)$estimate,cor.test(analysis$bl_ec_lbg, analysis$eup_ec_2009, method = "pearson", conf.level = 0.95)$conf.int, "EC","BL","EUP","LBG"))
ec_4 <- unlist(c(cor.test(analysis$bl_ec_mv, analysis$ches_ec_2010, method = "pearson", conf.level = 0.95)$estimate,cor.test(analysis$bl_ec_mv, analysis$ches_ec_2010, method = "pearson", conf.level = 0.95)$conf.int, "EC","BL","CHES","MV"))
ec_5 <- unlist(c(cor.test(analysis$bl_ec_mv, analysis$emp_ec_2009, method = "pearson", conf.level = 0.95)$estimate,cor.test(analysis$bl_ec_mv, analysis$emp_ec_2009, method = "pearson", conf.level = 0.95)$conf.int, "EC","BL","EMP","MV"))
ec_6 <- unlist(c(cor.test(analysis$bl_ec_mv, analysis$eup_ec_2009, method = "pearson", conf.level = 0.95)$estimate,cor.test(analysis$bl_ec_mv, analysis$eup_ec_2009, method = "pearson", conf.level = 0.95)$conf.int, "EC","BL","EUP","MV"))

ec_7 <- unlist(c(cor.test(analysis$ches_ec_lbg, analysis$ches_ec_2010, method = "pearson", conf.level = 0.95)$estimate,cor.test(analysis$ches_ec_lbg, analysis$ches_ec_2010, method = "pearson", conf.level = 0.95)$conf.int, "EC","CHES","CHES","LBG"))
ec_8 <- unlist(c(cor.test(analysis$ches_ec_lbg, analysis$emp_ec_2009, method = "pearson", conf.level = 0.95)$estimate,cor.test(analysis$ches_ec_lbg, analysis$emp_ec_2009, method = "pearson", conf.level = 0.95)$conf.int, "EC","CHES","EMP","LBG"))
ec_9 <- unlist(c(cor.test(analysis$ches_ec_lbg, analysis$eup_ec_2009, method = "pearson", conf.level = 0.95)$estimate,cor.test(analysis$ches_ec_lbg, analysis$eup_ec_2009, method = "pearson", conf.level = 0.95)$conf.int, "EC","CHES","EUP","LBG"))
ec_10 <- unlist(c(cor.test(analysis$ches_ec_mv, analysis$ches_ec_2010, method = "pearson", conf.level = 0.95)$estimate,cor.test(analysis$ches_ec_mv, analysis$ches_ec_2010, method = "pearson", conf.level = 0.95)$conf.int, "EC","CHES","CHES","MV"))
ec_11 <- unlist(c(cor.test(analysis$ches_ec_mv, analysis$emp_ec_2009, method = "pearson", conf.level = 0.95)$estimate,cor.test(analysis$ches_ec_mv, analysis$emp_ec_2009, method = "pearson", conf.level = 0.95)$conf.int, "EC","CHES","EMP","MV"))
ec_12 <- unlist(c(cor.test(analysis$ches_ec_mv, analysis$eup_ec_2009, method = "pearson", conf.level = 0.95)$estimate,cor.test(analysis$ches_ec_mv, analysis$eup_ec_2009, method = "pearson", conf.level = 0.95)$conf.int, "EC","CHES","EUP","MV"))

ec_13 <- unlist(c(cor.test(analysis$emp_ec_lbg, analysis$ches_ec_2010, method = "pearson", conf.level = 0.95)$estimate,cor.test(analysis$emp_ec_lbg, analysis$ches_ec_2010, method = "pearson", conf.level = 0.95)$conf.int, "EC","EMP","CHES","LBG"))
ec_14 <- unlist(c(cor.test(analysis$emp_ec_lbg, analysis$emp_ec_2009, method = "pearson", conf.level = 0.95)$estimate,cor.test(analysis$emp_ec_lbg, analysis$emp_ec_2009, method = "pearson", conf.level = 0.95)$conf.int, "EC","EMP","EMP","LBG"))
ec_15 <- unlist(c(cor.test(analysis$emp_ec_lbg, analysis$eup_ec_2009, method = "pearson", conf.level = 0.95)$estimate,cor.test(analysis$emp_ec_lbg, analysis$eup_ec_2009, method = "pearson", conf.level = 0.95)$conf.int, "EC","EMP","EUP","LBG"))
ec_16 <- unlist(c(cor.test(analysis$emp_ec_mv, analysis$ches_ec_2010, method = "pearson", conf.level = 0.95)$estimate,cor.test(analysis$emp_ec_mv, analysis$ches_ec_2010, method = "pearson", conf.level = 0.95)$conf.int, "EC","EMP","CHES","MV"))
ec_17 <- unlist(c(cor.test(analysis$emp_ec_mv, analysis$emp_ec_2009, method = "pearson", conf.level = 0.95)$estimate,cor.test(analysis$emp_ec_mv, analysis$emp_ec_2009, method = "pearson", conf.level = 0.95)$conf.int, "EC","EMP","EMP","MV"))
ec_18 <- unlist(c(cor.test(analysis$emp_ec_mv, analysis$eup_ec_2009, method = "pearson", conf.level = 0.95)$estimate,cor.test(analysis$emp_ec_mv, analysis$eup_ec_2009, method = "pearson", conf.level = 0.95)$conf.int, "EC","EMP","EUP","MV"))

# SO

so_1 <- unlist(c(cor.test(analysis$bl_so_lbg, analysis$ches_so_2010, method = "pearson", conf.level = 0.95)$estimate,cor.test(analysis$bl_so_lbg, analysis$ches_so_2010, method = "pearson", conf.level = 0.95)$conf.int, "SO","BL","CHES","LBG"))
so_2 <- unlist(c(cor.test(analysis$bl_so_lbg, analysis$emp_so_2009, method = "pearson", conf.level = 0.95)$estimate,cor.test(analysis$bl_so_lbg, analysis$emp_so_2009, method = "pearson", conf.level = 0.95)$conf.int, "SO","BL","EMP","LBG"))
so_3 <- unlist(c(cor.test(analysis$bl_so_lbg, analysis$eup_so_2009, method = "pearson", conf.level = 0.95)$estimate,cor.test(analysis$bl_so_lbg, analysis$eup_so_2009, method = "pearson", conf.level = 0.95)$conf.int, "SO","BL","EUP","LBG"))
so_4 <- unlist(c(cor.test(analysis$bl_so_mv, analysis$ches_so_2010, method = "pearson", conf.level = 0.95)$estimate,cor.test(analysis$bl_so_mv, analysis$ches_so_2010, method = "pearson", conf.level = 0.95)$conf.int, "SO","BL","CHES","MV"))
so_5 <- unlist(c(cor.test(analysis$bl_so_mv, analysis$emp_so_2009, method = "pearson", conf.level = 0.95)$estimate,cor.test(analysis$bl_so_mv, analysis$emp_so_2009, method = "pearson", conf.level = 0.95)$conf.int, "SO","BL","EMP","MV"))
so_6 <- unlist(c(cor.test(analysis$bl_so_mv, analysis$eup_so_2009, method = "pearson", conf.level = 0.95)$estimate,cor.test(analysis$bl_so_mv, analysis$eup_so_2009, method = "pearson", conf.level = 0.95)$conf.int, "SO","BL","EUP","MV"))

so_7 <- unlist(c(cor.test(analysis$ches_so_lbg, analysis$ches_so_2010, method = "pearson", conf.level = 0.95)$estimate,cor.test(analysis$ches_so_lbg, analysis$ches_so_2010, method = "pearson", conf.level = 0.95)$conf.int, "SO","CHES","CHES","LBG"))
so_8 <- unlist(c(cor.test(analysis$ches_so_lbg, analysis$emp_so_2009, method = "pearson", conf.level = 0.95)$estimate,cor.test(analysis$ches_so_lbg, analysis$emp_so_2009, method = "pearson", conf.level = 0.95)$conf.int, "SO","CHES","EMP","LBG"))
so_9 <- unlist(c(cor.test(analysis$ches_so_lbg, analysis$eup_so_2009, method = "pearson", conf.level = 0.95)$estimate,cor.test(analysis$ches_so_lbg, analysis$eup_so_2009, method = "pearson", conf.level = 0.95)$conf.int, "SO","CHES","EUP","LBG"))
so_10 <- unlist(c(cor.test(analysis$ches_so_mv, analysis$ches_so_2010, method = "pearson", conf.level = 0.95)$estimate,cor.test(analysis$ches_so_mv, analysis$ches_so_2010, method = "pearson", conf.level = 0.95)$conf.int, "SO","CHES","CHES","MV"))
so_11 <- unlist(c(cor.test(analysis$ches_so_mv, analysis$emp_so_2009, method = "pearson", conf.level = 0.95)$estimate,cor.test(analysis$ches_so_mv, analysis$emp_so_2009, method = "pearson", conf.level = 0.95)$conf.int, "SO","CHES","EMP","MV"))
so_12 <- unlist(c(cor.test(analysis$ches_so_mv, analysis$eup_so_2009, method = "pearson", conf.level = 0.95)$estimate,cor.test(analysis$ches_so_mv, analysis$eup_so_2009, method = "pearson", conf.level = 0.95)$conf.int, "SO","CHES","EUP","MV"))

so_13 <- unlist(c(cor.test(analysis$emp_so_lbg, analysis$ches_so_2010, method = "pearson", conf.level = 0.95)$estimate,cor.test(analysis$emp_so_lbg, analysis$ches_so_2010, method = "pearson", conf.level = 0.95)$conf.int, "SO","EMP","CHES","LBG"))
so_14 <- unlist(c(cor.test(analysis$emp_so_lbg, analysis$emp_so_2009, method = "pearson", conf.level = 0.95)$estimate,cor.test(analysis$emp_so_lbg, analysis$emp_so_2009, method = "pearson", conf.level = 0.95)$conf.int, "SO","EMP","EMP","LBG"))
so_15 <- unlist(c(cor.test(analysis$emp_so_lbg, analysis$eup_so_2009, method = "pearson", conf.level = 0.95)$estimate,cor.test(analysis$emp_so_lbg, analysis$eup_so_2009, method = "pearson", conf.level = 0.95)$conf.int, "SO","EMP","EUP","LBG"))
so_16 <- unlist(c(cor.test(analysis$emp_so_mv, analysis$ches_so_2010, method = "pearson", conf.level = 0.95)$estimate,cor.test(analysis$emp_so_mv, analysis$ches_so_2010, method = "pearson", conf.level = 0.95)$conf.int, "SO","EMP","CHES","MV"))
so_17 <- unlist(c(cor.test(analysis$emp_so_mv, analysis$emp_so_2009, method = "pearson", conf.level = 0.95)$estimate,cor.test(analysis$emp_so_mv, analysis$emp_so_2009, method = "pearson", conf.level = 0.95)$conf.int, "SO","EMP","EMP","MV"))
so_18 <- unlist(c(cor.test(analysis$emp_so_mv, analysis$eup_so_2009, method = "pearson", conf.level = 0.95)$estimate,cor.test(analysis$emp_so_mv, analysis$eup_so_2009, method = "pearson", conf.level = 0.95)$conf.int, "SO","EMP","EUP","MV"))


pearson <- rbind(lr_1,lr_2,lr_3,lr_4,lr_5,lr_6,lr_7,lr_8,lr_9,lr_10,lr_11,lr_12,lr_13,lr_14,lr_15,lr_16,lr_17,lr_18,eu_1,eu_2,eu_3,eu_4,eu_5,eu_6,eu_7,eu_8,eu_9,eu_10,eu_11,eu_12,eu_13,eu_14,eu_15,eu_16,eu_17,eu_18,ec_1,ec_2,ec_3,ec_4,ec_5,ec_6,ec_7,ec_8,ec_9,ec_10,ec_11,ec_12,ec_13,ec_14,ec_15,ec_16,ec_17,ec_18,so_1,so_2,so_3,so_4,so_5,so_6,so_7,so_8,so_9,so_10,so_11,so_12,so_13,so_14,so_15,so_16,so_17,so_18)

pearson <- as.data.frame(pearson)
colnames(pearson) <- c("pearson","pearson_lower","pearson_upper","dimension","reference","benchmark","rescaling")
rownames(pearson) <- NULL
pearson$pearson <- as.numeric(as.character(pearson$pearson))
pearson$pearson_lower <- as.numeric(as.character(pearson$pearson_lower))
pearson$pearson_upper <- as.numeric(as.character(pearson$pearson_upper))
pearson$dimension <- as.factor(as.character(pearson$dimension))
pearson$reference <- as.factor(as.character(pearson$reference))
pearson$benchmark <- as.factor(as.character(pearson$benchmark))
pearson$rescaling <- as.factor(as.character(pearson$rescaling))

correlation <- merge(ccc,pearson, by=c("dimension","reference","benchmark","rescaling"))
write.csv(correlation, file= "correlation.csv", row.names = FALSE)
rm(list = ls())

## Graphs

setwd("~/Downloads/Replication Files/")

correlation <- read.csv("~/Downloads/Replication Files/correlation.csv")
benchmarks <- read.csv("~/Downloads/Replication Files/benchmarks.csv")

lr_ches_emp_cor <- as.numeric(cor.test(benchmarks$ches_lr_2010,benchmarks$emp_lr_2009)$estimate)
lr_ches_eup_cor <- as.numeric(cor.test(benchmarks$ches_lr_2010,benchmarks$eup_lr_2009)$estimate)
lr_emp_eup_cor <- as.numeric(cor.test(benchmarks$emp_lr_2009,benchmarks$eup_lr_2009)$estimate)
eu_ches_emp_cor <- as.numeric(cor.test(benchmarks$ches_eu_2010,benchmarks$emp_eu_2009)$estimate)
eu_ches_eup_cor <- as.numeric(cor.test(benchmarks$ches_eu_2010,benchmarks$eup_eu_2009)$estimate)
eu_emp_eup_cor <- as.numeric(cor.test(benchmarks$emp_eu_2009,benchmarks$eup_eu_2009)$estimate)
ec_ches_emp_cor <- as.numeric(cor.test(benchmarks$ches_ec_2010,benchmarks$emp_ec_2009)$estimate)
ec_ches_eup_cor <- as.numeric(cor.test(benchmarks$ches_ec_2010,benchmarks$eup_ec_2009)$estimate)
ec_emp_eup_cor <- as.numeric(cor.test(benchmarks$emp_ec_2009,benchmarks$eup_ec_2009)$estimate)
so_ches_emp_cor <- as.numeric(cor.test(benchmarks$ches_so_2010,benchmarks$emp_so_2009)$estimate)
so_ches_eup_cor <- as.numeric(cor.test(benchmarks$ches_so_2010,benchmarks$eup_so_2009)$estimate)
so_emp_eup_cor <- as.numeric(cor.test(benchmarks$emp_so_2009,benchmarks$eup_so_2009)$estimate)

lr_ches_emp_ccc <- as.numeric(epi.ccc(benchmarks$ches_lr_2010,benchmarks$emp_lr_2009, ci = "z-transform", conf.level = 0.95)$rho.c[1])
lr_ches_eup_ccc <- as.numeric(epi.ccc(benchmarks$ches_lr_2010,benchmarks$eup_lr_2009, ci = "z-transform", conf.level = 0.95)$rho.c[1])
lr_emp_eup_ccc <- as.numeric(epi.ccc(benchmarks$emp_lr_2009,benchmarks$eup_lr_2009, ci = "z-transform", conf.level = 0.95)$rho.c[1])
eu_ches_emp_ccc <- as.numeric(epi.ccc(benchmarks$ches_eu_2010,benchmarks$emp_eu_2009, ci = "z-transform", conf.level = 0.95)$rho.c[1])
eu_ches_eup_ccc <- as.numeric(epi.ccc(benchmarks$ches_eu_2010,benchmarks$eup_eu_2009, ci = "z-transform", conf.level = 0.95)$rho.c[1])
eu_emp_eup_ccc <- as.numeric(epi.ccc(benchmarks$emp_eu_2009,benchmarks$eup_eu_2009, ci = "z-transform", conf.level = 0.95)$rho.c[1])
ec_ches_emp_ccc <- as.numeric(epi.ccc(benchmarks$ches_ec_2010,benchmarks$emp_ec_2009, ci = "z-transform", conf.level = 0.95)$rho.c[1])
ec_ches_eup_ccc <- as.numeric(epi.ccc(benchmarks$ches_ec_2010,benchmarks$eup_ec_2009, ci = "z-transform", conf.level = 0.95)$rho.c[1])
ec_emp_eup_ccc <- as.numeric(epi.ccc(benchmarks$emp_ec_2009,benchmarks$eup_ec_2009, ci = "z-transform", conf.level = 0.95)$rho.c[1])
so_ches_emp_ccc <- as.numeric(epi.ccc(benchmarks$ches_so_2010,benchmarks$emp_so_2009, ci = "z-transform", conf.level = 0.95)$rho.c[1])
so_ches_eup_ccc <- as.numeric(epi.ccc(benchmarks$ches_so_2010,benchmarks$eup_so_2009, ci = "z-transform", conf.level = 0.95)$rho.c[1])
so_emp_eup_ccc <- as.numeric(epi.ccc(benchmarks$emp_so_2009,benchmarks$eup_so_2009, ci = "z-transform", conf.level = 0.95)$rho.c[1])

correlation$reference <- as.character(correlation$reference)
correlation$benchmark <- as.character(correlation$benchmark)
refben <- stri_join(correlation$reference,correlation$benchmark, sep = "-")
correlation <- cbind(refben,correlation)
correlation$refben <- as.factor(correlation$refben)
correlation$reference <- NULL
correlation$benchmark <- NULL

lbg <- correlation[correlation$rescaling == "MV",]
mv <- correlation[correlation$rescaling == "LBG",]
lbg$rescaling <- NULL
mv$rescaling <- NULL

colnames(lbg)[6] <- "pearson_lbg"
colnames(lbg)[7] <- "pearson_lower_lbg"
colnames(lbg)[8] <- "pearson_upper_lbg"
colnames(mv)[6] <- "pearson_mv"
colnames(mv)[7] <- "pearson_lower_mv"
colnames(mv)[8] <- "pearson_upper_mv"

correlation <- merge(lbg, mv, by = c("refben","dimension"))
correlation$ccc.y <- NULL
correlation$ccc_lower.y <- NULL
correlation$ccc_upper.y <- NULL
colnames(correlation)[3] <- "ccc"
colnames(correlation)[4] <- "ccc_lower"
colnames(correlation)[5] <- "ccc_upper"

correlation_ccc <- correlation[,c(1:2,3:5)]
correlation_pearson_lbg <- correlation[,c(1:2,6:8)]
correlation_pearson_mv <- correlation[,c(1:2,9:11)]

rm(benchmarks,refben,lbg,mv,correlation)

correlation_ccc$type <- "CCC"
correlation_pearson_lbg$type <- "Pearson-LBG"
correlation_pearson_mv$type <- "Pearson-MV"

colnames(correlation_ccc)[3] <- "value"
colnames(correlation_ccc)[4] <- "lower"
colnames(correlation_ccc)[5] <- "upper"
colnames(correlation_pearson_lbg)[3] <- "value"
colnames(correlation_pearson_lbg)[4] <- "lower"
colnames(correlation_pearson_lbg)[5] <- "upper"
colnames(correlation_pearson_mv)[3] <- "value"
colnames(correlation_pearson_mv)[4] <- "lower"
colnames(correlation_pearson_mv)[5] <- "upper"

correlation <- rbind(correlation_ccc,correlation_pearson_lbg,correlation_pearson_mv)
correlation$type <- as.factor(correlation$type)
rm(correlation_ccc,correlation_pearson_lbg,correlation_pearson_mv)

lr <- correlation[correlation$dimension == "LR",]
eu <- correlation[correlation$dimension == "EU",]
ec <- correlation[correlation$dimension == "EC",]
so <- correlation[correlation$dimension == "SO",]
lr$dimension <- NULL
eu$dimension <- NULL
ec$dimension <- NULL
so$dimension <- NULL

#LR

lr_ccc <- lr[(lr$type=="CCC"),]
lr_pearsonlbg <- lr[(lr$type=="Pearson-LBG"),]
lr_pearsonmv <- lr[(lr$type=="Pearson-MV"),]

lr_ccc_plot <- ggplot(lr_ccc, aes(x=value, y=refben)) +
  geom_errorbarh(height=.1, size=0.2, aes(y=refben, xmin=lower, xmax=upper)) +
  geom_point(shape=21, size=3)+
  geom_vline(xintercept = lr_ches_emp_ccc, linetype="solid")+
  geom_vline(xintercept = lr_ches_eup_ccc, linetype="dashed")+
  geom_vline(xintercept = lr_emp_eup_ccc, linetype="dotted")+
  geom_vline(xintercept = 0, linetype="solid",colour="gray")+
  scale_x_continuous(limits = c(0,1),breaks = c(0, 0.25, 0.5, 0.75, 1), label = c("0", "0.25", "0.5", "0.75", "1"))+
  xlab("Value")+
  ylab("Reference-Benchmark")+
  ggtitle("CCC")+
  theme_classic()+
  theme(axis.title.x = element_blank())

lr_pearsonlbg_plot <- ggplot(lr_pearsonlbg, aes(x=value, y=refben)) +
  geom_errorbarh(height=.1, size=0.2, aes(y=refben, xmin=lower, xmax=upper)) +
  geom_point(shape=21, size=3)+
  geom_vline(xintercept = lr_ches_emp_cor, linetype="solid")+
  geom_vline(xintercept = lr_ches_eup_cor, linetype="dashed")+
  geom_vline(xintercept = lr_emp_eup_cor, linetype="dotted")+
  geom_vline(xintercept = 0, linetype="solid",colour="gray")+
  scale_x_continuous(limits = c(0,1),breaks = c(0, 0.25, 0.5, 0.75, 1), label = c("0", "0.25", "0.5", "0.75", "1"))+
  xlab("Value")+
  ylab("Reference-Benchmark")+
  ggtitle("Pearson-LBG")+
  theme_classic()+
  theme(axis.title.x = element_blank(),axis.title.y = element_blank(), axis.text.y= element_blank(), axis.ticks.y= element_blank())

lr_pearsonmv_plot <- ggplot(lr_pearsonmv, aes(x=value, y=refben)) +
  geom_errorbarh(height=.1, size=0.2, aes(y=refben, xmin=lower, xmax=upper)) +
  geom_point(shape=21, size=3)+
  geom_vline(xintercept = lr_ches_emp_cor, linetype="solid")+
  geom_vline(xintercept = lr_ches_eup_cor, linetype="dashed")+
  geom_vline(xintercept = lr_emp_eup_cor, linetype="dotted")+
  geom_vline(xintercept = 0, linetype="solid",colour="gray")+
  scale_x_continuous(limits = c(0,1),breaks = c(0, 0.25, 0.5, 0.75, 1), label = c("0", "0.25", "0.5", "0.75", "1"))+
  xlab("Value")+
  ylab("Reference-Benchmark")+
  ggtitle("Pearson-MV")+
  theme_classic()+
  theme(axis.title.x = element_blank(),axis.title.y = element_blank(), axis.text.y= element_blank(), axis.ticks.y= element_blank())

#EU

eu_ccc <- eu[(eu$type=="CCC"),]
eu_pearsonlbg <- eu[(eu$type=="Pearson-LBG"),]
eu_pearsonmv <- eu[(eu$type=="Pearson-MV"),]

eu_ccc_plot <- ggplot(eu_ccc, aes(x=value, y=refben)) +
  geom_errorbarh(height=.1, size=0.2, aes(y=refben, xmin=lower, xmax=upper)) +
  geom_point(shape=21, size=3)+
  geom_vline(xintercept = eu_ches_emp_ccc, linetype="solid")+
  geom_vline(xintercept = eu_ches_eup_ccc, linetype="dashed")+
  geom_vline(xintercept = eu_emp_eup_ccc, linetype="dotted")+
  geom_vline(xintercept = 0, linetype="solid",colour="gray")+
  scale_x_continuous(limits = c(-0.25,1),breaks = c(-0.25,0, 0.25, 0.5, 0.75, 1), label = c("-0.25","0", "0.25", "0.5", "0.75", "1"))+
  xlab("Value")+
  ylab("Reference-Benchmark")+
  ggtitle("CCC")+
  theme_classic()+
  theme(axis.title.x = element_blank())

eu_pearsonlbg_plot <- ggplot(eu_pearsonlbg, aes(x=value, y=refben)) +
  geom_errorbarh(height=.1, size=0.2, aes(y=refben, xmin=lower, xmax=upper)) +
  geom_point(shape=21, size=3)+
  geom_vline(xintercept = eu_ches_emp_cor, linetype="solid")+
  geom_vline(xintercept = eu_ches_eup_cor, linetype="dashed")+
  geom_vline(xintercept = eu_emp_eup_cor, linetype="dotted")+
  geom_vline(xintercept = 0, linetype="solid",colour="gray")+
  scale_x_continuous(limits = c(-0.25,1),breaks = c(-0.25,0, 0.25, 0.5, 0.75, 1), label = c("-0.25","0", "0.25", "0.5", "0.75", "1"))+
  xlab("Value")+
  ylab("Reference-Benchmark")+
  ggtitle("Pearson-LBG")+
  theme_classic()+
  theme(axis.title.x = element_blank(),axis.title.y = element_blank(), axis.text.y= element_blank(), axis.ticks.y= element_blank())

eu_pearsonmv_plot <- ggplot(eu_pearsonmv, aes(x=value, y=refben)) +
  geom_errorbarh(height=.1, size=0.2, aes(y=refben, xmin=lower, xmax=upper)) +
  geom_point(shape=21, size=3)+
  geom_vline(xintercept = eu_ches_emp_cor, linetype="solid")+
  geom_vline(xintercept = eu_ches_eup_cor, linetype="dashed")+
  geom_vline(xintercept = eu_emp_eup_cor, linetype="dotted")+
  geom_vline(xintercept = 0, linetype="solid",colour="gray")+
  scale_x_continuous(limits = c(-0.25,1),breaks = c(-0.25,0, 0.25, 0.5, 0.75, 1), label = c("-0.25","0", "0.25", "0.5", "0.75", "1"))+
  xlab("Value")+
  ylab("Reference-Benchmark")+
  ggtitle("Pearson-MV")+
  theme_classic()+
  theme(axis.title.x = element_blank(),axis.title.y = element_blank(), axis.text.y= element_blank(), axis.ticks.y= element_blank())

#EC

ec_ccc <- ec[(ec$type=="CCC"),]
ec_pearsonlbg <- ec[(ec$type=="Pearson-LBG"),]
ec_pearsonmv <- ec[(ec$type=="Pearson-MV"),]

ec_ccc_plot <- ggplot(ec_ccc, aes(x=value, y=refben)) +
  geom_errorbarh(height=.1, size=0.2, aes(y=refben, xmin=lower, xmax=upper)) +
  geom_point(shape=21, size=3)+
  geom_vline(xintercept = ec_ches_emp_ccc, linetype="solid")+
  geom_vline(xintercept = ec_ches_eup_ccc, linetype="dashed")+
  geom_vline(xintercept = ec_emp_eup_ccc, linetype="dotted")+
  geom_vline(xintercept = 0, linetype="solid",colour="gray")+
  scale_x_continuous(limits = c(-0.25,1),breaks = c(-0.25,0, 0.25, 0.5, 0.75, 1), label = c("-0.25","0", "0.25", "0.5", "0.75", "1"))+
  xlab("Value")+
  ylab("Reference-Benchmark")+
  ggtitle("CCC")+
  theme_classic()+
  theme(axis.title.x = element_blank())

ec_pearsonlbg_plot <- ggplot(ec_pearsonlbg, aes(x=value, y=refben)) +
  geom_errorbarh(height=.1, size=0.2, aes(y=refben, xmin=lower, xmax=upper)) +
  geom_point(shape=21, size=3)+
  geom_vline(xintercept = ec_ches_emp_cor, linetype="solid")+
  geom_vline(xintercept = ec_ches_eup_cor, linetype="dashed")+
  geom_vline(xintercept = ec_emp_eup_cor, linetype="dotted")+
  geom_vline(xintercept = 0, linetype="solid",colour="gray")+
  scale_x_continuous(limits = c(-0.25,1),breaks = c(-0.25,0, 0.25, 0.5, 0.75, 1), label = c("-0.25","0", "0.25", "0.5", "0.75", "1"))+
  xlab("Value")+
  ylab("Reference-Benchmark")+
  ggtitle("Pearson-LBG")+
  theme_classic()+
  theme(axis.title.x = element_blank(),axis.title.y = element_blank(), axis.text.y= element_blank(), axis.ticks.y= element_blank())

ec_pearsonmv_plot <- ggplot(ec_pearsonmv, aes(x=value, y=refben)) +
  geom_errorbarh(height=.1, size=0.2, aes(y=refben, xmin=lower, xmax=upper)) +
  geom_point(shape=21, size=3)+
  geom_vline(xintercept = ec_ches_emp_cor, linetype="solid")+
  geom_vline(xintercept = ec_ches_eup_cor, linetype="dashed")+
  geom_vline(xintercept = ec_emp_eup_cor, linetype="dotted")+
  geom_vline(xintercept = 0, linetype="solid",colour="gray")+
  scale_x_continuous(limits = c(-0.25,1),breaks = c(-0.25,0, 0.25, 0.5, 0.75, 1), label = c("-0.25","0", "0.25", "0.5", "0.75", "1"))+
  xlab("Value")+
  ylab("Reference-Benchmark")+
  ggtitle("Pearson-MV")+
  theme_classic()+
  theme(axis.title.x = element_blank(),axis.title.y = element_blank(), axis.text.y= element_blank(), axis.ticks.y= element_blank())

#SO

so_ccc <- so[(so$type=="CCC"),]
so_pearsonlbg <- so[(so$type=="Pearson-LBG"),]
so_pearsonmv <- so[(so$type=="Pearson-MV"),]

so_ccc_plot <- ggplot(so_ccc, aes(x=value, y=refben)) +
  geom_errorbarh(height=.1, size=0.2, aes(y=refben, xmin=lower, xmax=upper)) +
  geom_point(shape=21, size=3)+
  geom_vline(xintercept = so_ches_emp_ccc, linetype="solid")+
  geom_vline(xintercept = so_ches_eup_ccc, linetype="dashed")+
  geom_vline(xintercept = so_emp_eup_ccc, linetype="dotted")+
  geom_vline(xintercept = 0, linetype="solid",colour="gray")+
  scale_x_continuous(limits = c(-0.25,1),breaks = c(-0.25,0, 0.25, 0.5, 0.75, 1), label = c("-0.25","0", "0.25", "0.5", "0.75", "1"))+
  xlab("Value")+
  ylab("Reference-Benchmark")+
  ggtitle("CCC")+
  theme_classic()+
  theme(axis.title.x = element_blank())

so_pearsonlbg_plot <- ggplot(so_pearsonlbg, aes(x=value, y=refben)) +
  geom_errorbarh(height=.1, size=0.2, aes(y=refben, xmin=lower, xmax=upper)) +
  geom_point(shape=21, size=3)+
  geom_vline(xintercept = so_ches_emp_cor, linetype="solid")+
  geom_vline(xintercept = so_ches_eup_cor, linetype="dashed")+
  geom_vline(xintercept = so_emp_eup_cor, linetype="dotted")+
  geom_vline(xintercept = 0, linetype="solid",colour="gray")+
  scale_x_continuous(limits = c(-0.25,1),breaks = c(-0.25,0, 0.25, 0.5, 0.75, 1), label = c("-0.25","0", "0.25", "0.5", "0.75", "1"))+
  xlab("Value")+
  ylab("Reference-Benchmark")+
  ggtitle("Pearson-LBG")+
  theme_classic()+
  theme(axis.title.x = element_blank(),axis.title.y = element_blank(), axis.text.y= element_blank(), axis.ticks.y= element_blank())

so_pearsonmv_plot <- ggplot(so_pearsonmv, aes(x=value, y=refben)) +
  geom_errorbarh(height=.1, size=0.2, aes(y=refben, xmin=lower, xmax=upper)) +
  geom_point(shape=21, size=3)+
  geom_vline(xintercept = so_ches_emp_cor, linetype="solid")+
  geom_vline(xintercept = so_ches_eup_cor, linetype="dashed")+
  geom_vline(xintercept = so_emp_eup_cor, linetype="dotted")+
  geom_vline(xintercept = 0, linetype="solid",colour="gray")+
  scale_x_continuous(limits = c(-0.25,1),breaks = c(-0.25,0, 0.25, 0.5, 0.75, 1), label = c("-0.25","0", "0.25", "0.5", "0.75", "1"))+
  xlab("Value")+
  ylab("Reference-Benchmark")+
  ggtitle("Pearson-MV")+
  theme_classic()+
  theme(axis.title.x = element_blank(),axis.title.y = element_blank(), axis.text.y= element_blank(), axis.ticks.y= element_blank())


Figure3.pdf <- ggarrange(lr_ccc_plot, lr_pearsonlbg_plot, lr_pearsonmv_plot, ncol = 3, nrow = 1, widths = c(1.5,1,1))
Figure4.pdf  <- ggarrange(eu_ccc_plot, eu_pearsonlbg_plot, eu_pearsonmv_plot, ncol = 3, nrow = 1, widths = c(1.5,1,1))
Figure5.pdf  <- ggarrange(ec_ccc_plot, ec_pearsonlbg_plot, ec_pearsonmv_plot, ncol = 3, nrow = 1, widths = c(1.5,1,1))
Figure6.pdf  <- ggarrange(so_ccc_plot, so_pearsonlbg_plot, so_pearsonmv_plot, ncol = 3, nrow = 1, widths = c(1.5,1,1))

rm(list = ls())

###################################################################### Table 2 & Figure 7 ####################################################################

# Unload all packages as mlogit can have problems with interfering packages. Some packages might not be detached - this is (with the packages loaded earlier) not a problem, however.

lapply(paste('package:',names(sessionInfo()$otherPkgs),sep=""),detach,character.only=TRUE,unload=TRUE)

library(mlogit)
library(mnlogit)
library(berryFunctions)
library(reshape2)
library(ggplot2)
library(ggpubr)
library(irr)
library(xtable)

setwd("~/Downloads/Replication Files/")
wordscores_standardized <- read.csv("wordscores_standardized.csv")
benchmarks <- read.csv("benchmarks.csv")
analysis <- merge(wordscores_standardized, benchmarks, by = c("country","party"))
analysis <- analysis[,c(1,2,27,3:26,28:39)]
epgroup <- as.character(analysis$epgroup)
party <- as.character(analysis$party)
country <- as.character(analysis$country)

rm(benchmarks,wordscores_standardized)

### BL LBG

analysis_bl_lbg <- analysis[,c(3,5:7)]
na <- as.numeric(rownames(analysis_bl_lbg[!complete.cases(analysis_bl_lbg),]))
analysis_bl_lbg <- mlogit.data(analysis_bl_lbg, shape="wide", choice="epgroup")
analysis_bl_lbg_aic <- mnlogit(epgroup ~ 1|bl_eu_lbg + bl_ec_lbg + bl_so_lbg, analysis_bl_lbg, na.rm=TRUE)
analysis_bl_lbg_mcfadden <- mlogit(epgroup ~ 1|bl_eu_lbg + bl_ec_lbg + bl_so_lbg, analysis_bl_lbg)
analysis_bl_lbg_predict <- as.data.frame(predict(analysis_bl_lbg_aic))
analysis_bl_lbg_predict$predict <- colnames(analysis_bl_lbg_predict)[apply(analysis_bl_lbg_predict,1,which.max)]
analysis_bl_lbg_predict <- analysis_bl_lbg_predict$predict
analysis_bl_lbg_predict <- as.data.frame(analysis_bl_lbg_predict)
analysis_bl_lbg_predict <- insertRows(analysis_bl_lbg_predict, r=na)
analysis_bl_lbg_predict <- analysis_bl_lbg_predict$analysis_bl_lbg_predict
analysis_bl_lbg_predict <- addNA(analysis_bl_lbg_predict)
analysis_bl_lbg_predict <- as.character(analysis_bl_lbg_predict)
analysis_bl_lbg_mcfadden <- as.numeric(summary(analysis_bl_lbg_mcfadden)$mfR2)

rm(na,analysis_bl_lbg)


# CHES LBG

analysis_ches_lbg <- analysis[,c(3,9:11)]
na <- as.numeric(rownames(analysis_ches_lbg[!complete.cases(analysis_ches_lbg),]))
analysis_ches_lbg <- mlogit.data(analysis_ches_lbg, shape="wide", choice="epgroup")
analysis_ches_lbg_aic <- mnlogit(epgroup ~ 1|ches_eu_lbg + ches_ec_lbg + ches_so_lbg, analysis_ches_lbg, na.rm=TRUE)
analysis_ches_lbg_mcfadden <- mlogit(epgroup ~ 1|ches_eu_lbg + ches_ec_lbg + ches_so_lbg, analysis_ches_lbg)
analysis_ches_lbg_predict <- as.data.frame(predict(analysis_ches_lbg_aic))
analysis_ches_lbg_predict$predict <- colnames(analysis_ches_lbg_predict)[apply(analysis_ches_lbg_predict,1,which.max)]
analysis_ches_lbg_predict <- analysis_ches_lbg_predict$predict
analysis_ches_lbg_predict <- as.data.frame(analysis_ches_lbg_predict)
analysis_ches_lbg_predict <- insertRows(analysis_ches_lbg_predict, r=na)
analysis_ches_lbg_predict <- analysis_ches_lbg_predict$analysis_ches_lbg_predict
analysis_ches_lbg_predict <- addNA(analysis_ches_lbg_predict)
analysis_ches_lbg_predict <- as.character(analysis_ches_lbg_predict)
analysis_ches_lbg_mcfadden <- as.numeric(summary(analysis_ches_lbg_mcfadden)$mfR2)

rm(na,analysis_ches_lbg)


# EMP LBG

analysis_emp_lbg <- analysis[,c(3,13:15)]
na <- as.numeric(rownames(analysis_emp_lbg[!complete.cases(analysis_emp_lbg),]))
analysis_emp_lbg <- mlogit.data(analysis_emp_lbg, shape="wide", choice="epgroup")
analysis_emp_lbg_aic <- mnlogit(epgroup ~ 1|emp_eu_lbg + emp_ec_lbg + emp_so_lbg, analysis_emp_lbg, na.rm=TRUE)
analysis_emp_lbg_mcfadden <- mlogit(epgroup ~ 1|emp_eu_lbg + emp_ec_lbg + emp_so_lbg, analysis_emp_lbg)
analysis_emp_lbg_predict <- as.data.frame(predict(analysis_emp_lbg_aic))
analysis_emp_lbg_predict$predict <- colnames(analysis_emp_lbg_predict)[apply(analysis_emp_lbg_predict,1,which.max)]
analysis_emp_lbg_predict <- analysis_emp_lbg_predict$predict
analysis_emp_lbg_predict <- as.data.frame(analysis_emp_lbg_predict)
analysis_emp_lbg_predict <- insertRows(analysis_emp_lbg_predict, r=na)
analysis_emp_lbg_predict <- analysis_emp_lbg_predict$analysis_emp_lbg_predict
analysis_emp_lbg_predict <- addNA(analysis_emp_lbg_predict)
analysis_emp_lbg_predict <- as.character(analysis_emp_lbg_predict)
analysis_emp_lbg_mcfadden <- as.numeric(summary(analysis_emp_lbg_mcfadden)$mfR2)

rm(na,analysis_emp_lbg)

### BL MV

analysis_bl_mv <- analysis[,c(3,17:19)]
na <- as.numeric(rownames(analysis_bl_mv[!complete.cases(analysis_bl_mv),]))
analysis_bl_mv <- mlogit.data(analysis_bl_mv, shape="wide", choice="epgroup")
analysis_bl_mv_aic <- mnlogit(epgroup ~ 1|bl_eu_mv + bl_ec_mv + bl_so_mv, analysis_bl_mv, na.rm=TRUE)
analysis_bl_mv_mcfadden <- mlogit(epgroup ~ 1|bl_eu_mv + bl_ec_mv + bl_so_mv, analysis_bl_mv)
analysis_bl_mv_predict <- as.data.frame(predict(analysis_bl_mv_aic))
analysis_bl_mv_predict$predict <- colnames(analysis_bl_mv_predict)[apply(analysis_bl_mv_predict,1,which.max)]
analysis_bl_mv_predict <- analysis_bl_mv_predict$predict
analysis_bl_mv_predict <- as.data.frame(analysis_bl_mv_predict)
analysis_bl_mv_predict <- insertRows(analysis_bl_mv_predict, r=na)
analysis_bl_mv_predict <- analysis_bl_mv_predict$analysis_bl_mv_predict
analysis_bl_mv_predict <- addNA(analysis_bl_mv_predict)
analysis_bl_mv_predict <- as.character(analysis_bl_mv_predict)
analysis_bl_mv_mcfadden <- as.numeric(summary(analysis_bl_mv_mcfadden)$mfR2)

rm(na,analysis_bl_mv)


# CHES MV

analysis_ches_mv <- analysis[,c(3,21:23)]
na <- as.numeric(rownames(analysis_ches_mv[!complete.cases(analysis_ches_mv),]))
analysis_ches_mv <- mlogit.data(analysis_ches_mv, shape="wide", choice="epgroup")
analysis_ches_mv_aic <- mnlogit(epgroup ~ 1|ches_eu_mv + ches_ec_mv + ches_so_mv, analysis_ches_mv, na.rm=TRUE)
analysis_ches_mv_mcfadden <- mlogit(epgroup ~ 1|ches_eu_mv + ches_ec_mv + ches_so_mv, analysis_ches_mv)
analysis_ches_mv_predict <- as.data.frame(predict(analysis_ches_mv_aic))
analysis_ches_mv_predict$predict <- colnames(analysis_ches_mv_predict)[apply(analysis_ches_mv_predict,1,which.max)]
analysis_ches_mv_predict <- analysis_ches_mv_predict$predict
analysis_ches_mv_predict <- as.data.frame(analysis_ches_mv_predict)
analysis_ches_mv_predict <- insertRows(analysis_ches_mv_predict, r=na)
analysis_ches_mv_predict <- analysis_ches_mv_predict$analysis_ches_mv_predict
analysis_ches_mv_predict <- addNA(analysis_ches_mv_predict)
analysis_ches_mv_predict <- as.character(analysis_ches_mv_predict)
analysis_ches_mv_mcfadden <- as.numeric(summary(analysis_ches_mv_mcfadden)$mfR2)

rm(na,analysis_ches_mv)


# EMP MV 

analysis_emp_mv <- analysis[,c(3,25:27)]
na <- as.numeric(rownames(analysis_emp_mv[!complete.cases(analysis_emp_mv),]))
analysis_emp_mv <- mlogit.data(analysis_emp_mv, shape="wide", choice="epgroup")
analysis_emp_mv_aic <- mnlogit(epgroup ~ 1|emp_eu_mv + emp_ec_mv + emp_so_mv, analysis_emp_mv, na.rm=TRUE)
analysis_emp_mv_mcfadden <- mlogit(epgroup ~ 1|emp_eu_mv + emp_ec_mv + emp_so_mv, analysis_emp_mv)
analysis_emp_mv_predict <- as.data.frame(predict(analysis_emp_mv_aic))
analysis_emp_mv_predict$predict <- colnames(analysis_emp_mv_predict)[apply(analysis_emp_mv_predict,1,which.max)]
analysis_emp_mv_predict <- analysis_emp_mv_predict$predict
analysis_emp_mv_predict <- as.data.frame(analysis_emp_mv_predict)
analysis_emp_mv_predict <- analysis_emp_mv_predict$analysis_emp_mv_predict
analysis_emp_mv_predict <- as.character(analysis_emp_mv_predict)
analysis_emp_mv_mcfadden <- as.numeric(summary(analysis_emp_mv_mcfadden)$mfR2)

rm(na,analysis_emp_mv)


# CHES 2010

analysis_ches_2010 <- analysis[,c(3,29:31)]
na <- as.numeric(rownames(analysis_ches_2010[!complete.cases(analysis_ches_2010),]))
analysis_ches_2010 <- mlogit.data(analysis_ches_2010, shape="wide", choice="epgroup")
analysis_ches_2010_aic <- mnlogit(epgroup ~ 1|ches_eu_2010 + ches_ec_2010 + ches_so_2010, analysis_ches_2010, na.rm=TRUE)
analysis_ches_2010_mcfadden <- mlogit(epgroup ~ 1|ches_eu_2010 + ches_ec_2010 + ches_so_2010, analysis_ches_2010)
analysis_ches_2010_predict <- as.data.frame(predict(analysis_ches_2010_aic))
analysis_ches_2010_predict$predict <- colnames(analysis_ches_2010_predict)[apply(analysis_ches_2010_predict,1,which.max)]
analysis_ches_2010_predict <- analysis_ches_2010_predict$predict
analysis_ches_2010_predict <- as.data.frame(analysis_ches_2010_predict)
analysis_ches_2010_predict <- insertRows(analysis_ches_2010_predict, r=na)
analysis_ches_2010_predict <- analysis_ches_2010_predict$analysis_ches_2010_predict
analysis_ches_2010_predict <- addNA(analysis_ches_2010_predict)
analysis_ches_2010_predict <- as.character(analysis_ches_2010_predict)
analysis_ches_2010_mcfadden <- as.numeric(summary(analysis_ches_2010_mcfadden)$mfR2)

rm(na,analysis_ches_2010)


# EMP 2009

analysis_emp_2009 <- analysis[,c(3,33:35)]
na <- as.numeric(rownames(analysis_emp_2009[!complete.cases(analysis_emp_2009),]))
analysis_emp_2009 <- mlogit.data(analysis_emp_2009, shape="wide", choice="epgroup")
analysis_emp_2009_aic <- mnlogit(epgroup ~ 1|emp_eu_2009 + emp_ec_2009 + emp_so_2009, analysis_emp_2009, na.rm=TRUE)
analysis_emp_2009_mcfadden <- mlogit(epgroup ~ 1|emp_eu_2009 + emp_ec_2009 + emp_so_2009, analysis_emp_2009)
analysis_emp_2009_predict <- as.data.frame(predict(analysis_emp_2009_aic))
analysis_emp_2009_predict$predict <- colnames(analysis_emp_2009_predict)[apply(analysis_emp_2009_predict,1,which.max)]
analysis_emp_2009_predict <- analysis_emp_2009_predict$predict
analysis_emp_2009_predict <- as.data.frame(analysis_emp_2009_predict)
analysis_emp_2009_predict <- insertRows(analysis_emp_2009_predict, r=na)
analysis_emp_2009_predict <- analysis_emp_2009_predict$analysis_emp_2009_predict
analysis_emp_2009_predict <- addNA(analysis_emp_2009_predict)
analysis_emp_2009_predict <- as.character(analysis_emp_2009_predict)
analysis_emp_2009_mcfadden <- as.numeric(summary(analysis_emp_2009_mcfadden)$mfR2)

rm(na,analysis_emp_2009)

# EUP 2009

analysis_eup_2009 <- analysis[,c(3,37:39)]
na <- as.numeric(rownames(analysis_eup_2009[!complete.cases(analysis_eup_2009),]))
analysis_eup_2009 <- mlogit.data(analysis_eup_2009, shape="wide", choice="epgroup")
analysis_eup_2009_aic <- mnlogit(epgroup ~ 1|eup_eu_2009 + eup_ec_2009 + eup_so_2009, analysis_eup_2009, na.rm=TRUE)
analysis_eup_2009_mcfadden <- mlogit(epgroup ~ 1|eup_eu_2009 + eup_ec_2009 + eup_so_2009, analysis_eup_2009)
analysis_eup_2009_predict <- as.data.frame(predict(analysis_eup_2009_aic))
analysis_eup_2009_predict$predict <- colnames(analysis_eup_2009_predict)[apply(analysis_eup_2009_predict,1,which.max)]
analysis_eup_2009_predict <- analysis_eup_2009_predict$predict
analysis_eup_2009_predict <- as.data.frame(analysis_eup_2009_predict)
analysis_eup_2009_predict <- insertRows(analysis_eup_2009_predict, r=na)
analysis_eup_2009_predict <- analysis_eup_2009_predict$analysis_eup_2009_predict
analysis_eup_2009_predict <- addNA(analysis_eup_2009_predict)
analysis_eup_2009_predict <- as.character(analysis_eup_2009_predict)
analysis_eup_2009_mcfadden <- as.numeric(summary(analysis_eup_2009_mcfadden)$mfR2)

rm(na,analysis_eup_2009)

# Collate Scores

predict_parties <- as.data.frame(cbind(country,party,epgroup,analysis_bl_lbg_predict,analysis_bl_mv_predict,analysis_ches_lbg_predict,analysis_ches_mv_predict,analysis_emp_lbg_predict,analysis_emp_mv_predict,analysis_ches_2010_predict,analysis_emp_2009_predict,analysis_eup_2009_predict))
colnames(predict_parties) <- c("country","party","epgroup","bl_lbg","bl_mv","ches_lbg","ches_mv","emp_lbg","emp_mv","ches_2010","emp_2009","eup_2009")

mcfadden <- as.data.frame(rbind(analysis_bl_lbg_mcfadden,analysis_bl_mv_mcfadden,analysis_ches_lbg_mcfadden,analysis_ches_mv_mcfadden,analysis_emp_lbg_mcfadden,analysis_emp_mv_mcfadden,analysis_ches_2010_mcfadden,analysis_emp_2009_mcfadden,analysis_eup_2009_mcfadden))
rownames(mcfadden) <- NULL
type <- c("bl_lbg","bl_mv","ches_lbg","ches_mv","emp_lbg","emp_mv","ches_2010","emp_2009","eup_2009")
scores <- cbind(type,mcfadden)
colnames(scores) <- c("type","McFadden")
scores$type <- as.character(scores$type)

setwd("~/Downloads/Replication Files/")
write.csv(scores, file= "construct_scores.csv", row.names = FALSE)
write.csv(predict_parties, file= "construct_predict.csv", row.names = FALSE)

rm(list = ls())


## Construct Figures

construct_predict <- read.csv("construct_predict.csv")

construct_predict$bl_lbg <- factor(construct_predict$bl_lbg, levels = c(levels(construct_predict$bl_lbg),  "ECR", "EFD","NI"))
construct_predict$bl_lbg <- factor(construct_predict$bl_lbg, levels = c("ALDE", "ECR", "EFD" ,"EPP", "GREEN EFA", "GUE NGL", "NI" ,"SD"))
construct_predict$bl_mv <- factor(construct_predict$bl_mv, levels = c(levels(construct_predict$bl_mv),  "ECR", "EFD","NI"))
construct_predict$bl_mv <- factor(construct_predict$bl_mv, levels = c("ALDE", "ECR", "EFD" ,"EPP", "GREEN EFA", "GUE NGL", "NI" ,"SD"))

construct_predict$ches_lbg <- factor(construct_predict$ches_lbg, levels = c(levels(construct_predict$ches_lbg),  "ECR", "EFD","NI"))
construct_predict$ches_lbg <- factor(construct_predict$ches_lbg, levels = c("ALDE", "ECR", "EFD" ,"EPP", "GREEN EFA", "GUE NGL", "NI" ,"SD"))
construct_predict$ches_mv <- factor(construct_predict$ches_mv, levels = c(levels(construct_predict$ches_mv),  "ECR", "EFD","NI"))
construct_predict$ches_mv <- factor(construct_predict$ches_mv, levels = c("ALDE", "ECR", "EFD" ,"EPP", "GREEN EFA", "GUE NGL", "NI" ,"SD"))

construct_predict$emp_lbg <- factor(construct_predict$emp_lbg, levels = c(levels(construct_predict$emp_lbg)))
construct_predict$emp_lbg <- factor(construct_predict$emp_lbg, levels = c("ALDE", "ECR", "EFD" ,"EPP", "GREEN EFA", "GUE NGL", "NI" ,"SD"))
construct_predict$emp_mv <- factor(construct_predict$emp_mv, levels = c(levels(construct_predict$emp_mv)))
construct_predict$emp_mv <- factor(construct_predict$emp_mv, levels = c("ALDE", "ECR", "EFD" ,"EPP", "GREEN EFA", "GUE NGL", "NI" ,"SD"))

construct_predict$eup_2009 <- factor(construct_predict$eup_2009, levels = c(levels(construct_predict$eup_2009)))
construct_predict$eup_2009 <- factor(construct_predict$eup_2009, levels = c("ALDE", "ECR", "EFD" ,"EPP", "GREEN EFA", "GUE NGL", "NI" ,"SD"))

summary_bl_lbg <- melt(table(construct_predict$epgroup, construct_predict$bl_lbg))
summary_bl_mv <- melt(table(construct_predict$epgroup, construct_predict$bl_mv))
summary_ches_lbg <- melt(table(construct_predict$epgroup, construct_predict$ches_lbg))
summary_ches_mv <- melt(table(construct_predict$epgroup, construct_predict$ches_mv))
summary_emp_lbg <- melt(table(construct_predict$epgroup, construct_predict$emp_lbg))
summary_emp_mv <- melt(table(construct_predict$epgroup, construct_predict$emp_mv))
summary_ches_2010 <- melt(table(construct_predict$epgroup, construct_predict$ches_2010))
summary_emp_2009 <- melt(table(construct_predict$epgroup, construct_predict$emp_2009))
summary_eup_2009 <- melt(table(construct_predict$epgroup, construct_predict$eup_2009))

levels(summary_bl_lbg$Var1) <- gsub(" ", "\n", levels(summary_bl_lbg$Var1))
levels(summary_bl_lbg$Var2) <- gsub(" ", "\n", levels(summary_bl_lbg$Var2))
levels(summary_bl_mv$Var1) <- gsub(" ", "\n", levels(summary_bl_mv$Var1))
levels(summary_bl_mv$Var2) <- gsub(" ", "\n", levels(summary_bl_mv$Var2))
levels(summary_ches_lbg$Var1) <- gsub(" ", "\n", levels(summary_ches_lbg$Var1))
levels(summary_ches_lbg$Var2) <- gsub(" ", "\n", levels(summary_ches_lbg$Var2))
levels(summary_ches_mv$Var1) <- gsub(" ", "\n", levels(summary_ches_mv$Var1))
levels(summary_ches_mv$Var2) <- gsub(" ", "\n", levels(summary_ches_mv$Var2))
levels(summary_emp_lbg$Var1) <- gsub(" ", "\n", levels(summary_emp_lbg$Var1))
levels(summary_emp_lbg$Var2) <- gsub(" ", "\n", levels(summary_emp_lbg$Var2))
levels(summary_emp_mv$Var1) <- gsub(" ", "\n", levels(summary_emp_mv$Var1))
levels(summary_emp_mv$Var2) <- gsub(" ", "\n", levels(summary_emp_mv$Var2))
levels(summary_ches_2010$Var1) <- gsub(" ", "\n", levels(summary_ches_2010$Var1))
levels(summary_ches_2010$Var2) <- gsub(" ", "\n", levels(summary_ches_2010$Var2))
levels(summary_emp_2009$Var1) <- gsub(" ", "\n", levels(summary_emp_2009$Var1))
levels(summary_emp_2009$Var2) <- gsub(" ", "\n", levels(summary_emp_2009$Var2))
levels(summary_eup_2009$Var1) <- gsub(" ", "\n", levels(summary_eup_2009$Var1))
levels(summary_eup_2009$Var2) <- gsub(" ", "\n", levels(summary_eup_2009$Var2))

# LB LBG
bl_lbg_plot <- ggplot(data = summary_bl_lbg, aes(x = Var1, y = Var2)) +
  geom_tile(aes(fill = value)) + 
  xlab("EPGroup")+
  ylab("BL - LBG")+
  scale_fill_gradient(high = "black", low = "white")+
  theme_classic()+
  theme(legend.position="none", axis.text=element_text(size=12))

# LB MV
bl_mv_plot <- ggplot(data = summary_bl_mv, aes(x = Var1, y = Var2)) +
  geom_tile(aes(fill = value)) + 
  xlab("EPGroup")+
  ylab("BL - MV")+
  scale_fill_gradient(high = "black", low = "white")+
  theme_classic()+
  theme(legend.position="none")

# CHES LBG
ches_lbg_plot <- ggplot(data = summary_ches_lbg, aes(x = Var1, y = Var2)) +
  geom_tile(aes(fill = value)) + 
  xlab("EPGroup")+
  ylab("CHES - LBG")+
  scale_fill_gradient(high = "black", low = "white")+
  theme_classic()+
  theme(legend.position="none")

# CHES MV
ches_mv_plot <- ggplot(data = summary_ches_mv, aes(x = Var1, y = Var2)) +
  geom_tile(aes(fill = value)) + 
  xlab("EPGroup")+
  ylab("CHES - MV")+
  scale_fill_gradient(high = "black", low = "white")+
  theme_classic()+
  theme(legend.position="none")

# EMP LBG
emp_lbg_plot <- ggplot(data = summary_emp_lbg, aes(x = Var1, y = Var2)) +
  geom_tile(aes(fill = value)) + 
  xlab("EPGroup")+
  ylab("EMP - LBG")+
  scale_fill_gradient(high = "black", low = "white")+
  theme_classic()+
  theme(legend.position="none")

# EMP MV
emp_mv_plot <- ggplot(data = summary_emp_mv, aes(x = Var1, y = Var2)) +
  geom_tile(aes(fill = value)) + 
  xlab("EPGroup")+
  ylab("EMP - MV")+
  scale_fill_gradient(high = "black", low = "white")+
  theme_classic()+
  theme(legend.position="none")

# CHES 2010
ches_2010_plot <- ggplot(data = summary_ches_2010, aes(x = Var1, y = Var2)) +
  geom_tile(aes(fill = value)) + 
  xlab("EPGroup")+
  ylab("CHES - 2010")+
  scale_fill_gradient(high = "black", low = "white")+
  theme_classic()+
  theme(legend.position="none")
# EMP 2009
emp_2009_plot <- ggplot(data = summary_emp_2009, aes(x = Var1, y = Var2)) +
  geom_tile(aes(fill = value)) + 
  xlab("EPGroup")+
  ylab("EMP - 2009")+
  scale_fill_gradient(high = "black", low = "white")+
  theme_classic()+
  theme(legend.position="none")

# EUP 2009
eup_2009_plot <- ggplot(data = summary_eup_2009, aes(x = Var1, y = Var2)) +
  geom_tile(aes(fill = value)) + 
  xlab("EPGroup")+
  ylab("EUP - 2009")+
  scale_fill_gradient(high = "black", low = "white")+
  theme_classic()+
  theme(legend.position="none")

Figure7a <- bl_mv_plot
Figure7b <- eup_2009_plot
Figure7c <- ches_2010_plot


### Krippendorf's Alpha

bl_lbg <- t(cbind(construct_predict$epgroup, construct_predict$bl_lbg))
bl_mv <- t(cbind(construct_predict$epgroup, construct_predict$bl_mv))
ches_lbg <- t(cbind(construct_predict$epgroup, construct_predict$ches_lbg))
ches_mv <- t(cbind(construct_predict$epgroup, construct_predict$ches_mv))
emp_lbg <- t(cbind(construct_predict$epgroup, construct_predict$emp_lbg))
emp_mv <- t(cbind(construct_predict$epgroup, construct_predict$emp_mv))
ches_2010 <- t(cbind(construct_predict$epgroup, construct_predict$ches_2010))
emp_2009 <- t(cbind(construct_predict$epgroup, construct_predict$emp_2009))
eup_2009 <- t(cbind(construct_predict$epgroup, construct_predict$eup_2009))

bl_lbg_alpha <- kripp.alpha(bl_lbg, method=c("nominal"))$value
bl_mv_alpha <- kripp.alpha(bl_mv, method=c("nominal"))$value
ches_lbg_alpha <- kripp.alpha(ches_lbg, method=c("nominal"))$value
ches_mv_alpha <- kripp.alpha(ches_mv, method=c("nominal"))$value
emp_lbg_alpha <- kripp.alpha(emp_lbg, method=c("nominal"))$value
emp_mv_alpha <- kripp.alpha(emp_mv, method=c("nominal"))$value
ches_2010_alpha <- kripp.alpha(ches_2010, method=c("nominal"))$value
emp_2009_alpha <- kripp.alpha(emp_2009, method=c("nominal"))$value
eup_2009_alpha <- kripp.alpha(eup_2009, method=c("nominal"))$value

type <- c("bl_lbg","bl_mv","ches_lbg","ches_mv","emp_lbg","emp_mv","ches_2010","emp_2009","eup_2009")
k_alpha <- as.data.frame(rbind(bl_lbg_alpha,bl_mv_alpha,ches_lbg_alpha,ches_mv_alpha,emp_lbg_alpha,emp_mv_alpha,ches_2010_alpha,emp_2009_alpha,eup_2009_alpha))
k_alpha <- cbind(type,k_alpha)
k_alpha$type <- as.character(k_alpha$type)
colnames(k_alpha) <- c("type","k_alpha")
rownames(k_alpha) <- NULL

construct_scores <- read.csv("~/Downloads/Replication Files/construct_scores.csv")
construct_scores$type <- as.character(construct_scores$type)
construct_scores <- merge(construct_scores,k_alpha,by="type")

construct_scores$k_alpha.y <- NULL
write.csv(construct_scores, file= "construct_scores.csv", row.names = FALSE)
table_2 <- xtable(construct_scores)
print(table_2, booktabs = TRUE, digits = 2, include.rownames = FALSE)

rm(list = ls())

####################################################################### Appendix E ###############################################################

# The file content_validity contains the following variables:
  # word refers to each and every word in the UK Manifestos for 2009 (including 3 - BNP, GREENS, UKIP that were not part of the analysis above)
  # wordscore refers the the wordscores (these scores have been generated in a previous instance with STATA (and the implementation of Wordscores therein))
  # eu_count refers to the amount of times a word was counted for each party
  # eu_mean is the fraction of times the word was deemed correct (as in, present in a sentence relating to the EU) - this was done manually
  # party refers to the party

content_validity <- read.csv("~/Downloads/Replication Files/content_validity.csv")
content_validity$party <- factor(content_validity$party,levels(content_validity$party)[c(1:7,9,8)])  #Order the factors

ggplot(content_validity, aes(eu_mean, group = party)) +
  geom_density(kernel="epanechnikov") + 
  facet_wrap(~party, ncol=3, scales = "free")+
  ylab("Density")+
  xlab("Word Relevance (Mean)")+
  theme_classic()

ggplot(content_validity, aes(eu_mean,wordscore)) +
  geom_point() + 
  ylab("Wordscore")+
  xlab("Word Relevance (Mean)")+
  scale_y_continuous(limits = c(3,17),breaks = c(3,5,7,9,11,13,15,17))+
  scale_x_continuous(limits = c(0,1),breaks = c(0,0.25,0.5,0.75,1),labels = c("0","0.25","0.5","0.75","1"))+
  theme_classic()

set.seed(42)
# Figure might be slightly different do to randomness

content_validity <- content_validity[ which(content_validity$party=='UKIP'), ]
content_validity$y <- runif(165, min=0, max=1) # Generate a random variable to seperate the words on the y-axis

ggplot(content_validity, aes(wordscore, y)) +
  geom_text(aes(label=word, size = eu_count, colour = factor(eu_mean)))+ # Factor denotes the usefulness of the word
  scale_size(range=c(4,10))+ # Increase the size of the words for better readability
  scale_colour_grey(start=0.8, end=0.2)+
  expand_limits(x=c(0,16))+
  scale_x_continuous(breaks=seq(0,16,2))+
  xlab("Wordscore")+
  theme_classic()+
  theme(legend.position="none", axis.text.y=element_blank(), axis.title.y=element_blank(), axis.ticks.y=element_blank(), axis.line.y=element_blank())



####################################################################### Appendix F ######################################################################

library(readr)
library(quanteda)
library(readtext)
library(pdftools)
library(rlist)
library(stopwords)
library(scales)
library(epiR)
library(ggplot2)
library(ggpubr)

## Social Parse

setwd("~/Downloads/Replication Files/")

social_texts <- readtext("Parsing/*_SOC.txt")
social_corpus <- corpus(social_texts)
social_dfm <- dfm(social_corpus)
is.dfm(social_dfm)
social <- social_dfm

# Cleaning

social <- dfm_tolower(social)
social <- dfm_select(social,"[[:punct:]]", selection = "remove", valuetype = "regex", verbose = TRUE)
social <- dfm_select(social,"[[:digit:]]", selection = "remove", valuetype = "regex", verbose = TRUE)
social <- dfm_select(social,"[[:cntrl:]]", selection = "remove", valuetype = "regex", verbose = TRUE)
social <- dfm_select(social,"[[:space:]]", selection = "remove", valuetype = "regex", verbose = TRUE)
social <- dfm_select(social,"[[:blank:]]", selection = "remove", valuetype = "regex", verbose = TRUE)
social <- dfm_select(social, stopwords(language = "en", source = "stopwords-iso"), selection = "remove", valuetype = "fixed", verbose = TRUE)

# Run Wordscores

social@Dimnames$docs

scores_social_so_ches <- c(8.1700001,NA,4.8299999,NA,2.5,NA,4.1900001,NA,4.5,NA)
ws_social_so_ches <- textmodel_wordscores(social, scores_social_so_ches)

social_so_ches_lbg <- as.data.frame(predict(ws_social_so_ches, interval = "confidence", rescaling = "lbg"))
social_so_ches_mv <- as.data.frame(predict(ws_social_so_ches, interval = "confidence", rescaling = "mv"))

names(social_so_ches_lbg)[1] <- "ches_so_lbg_value"
names(social_so_ches_lbg)[2] <- "ches_so_lbg_lower"
names(social_so_ches_lbg)[3] <- "ches_so_lbg_upper"
names(social_so_ches_mv)[1] <- "ches_so_mv_value"
names(social_so_ches_mv)[2] <- "ches_so_mv_lower"
names(social_so_ches_mv)[3] <- "ches_so_mv_upper"

social_wordscores <- cbind(social_so_ches_lbg,social_so_ches_mv)
social_wordscores <- as.matrix(social_wordscores)
social_wordscores <- as.data.frame(social_wordscores)
setwd("~/Downloads/Replication Files/")
write.csv(social_wordscores, file= "wordscores_social.csv")
rm(list = ls())

## Economic Parse

setwd("~/Downloads/Replication Files/")

economic_texts <- readtext("Parsing/*_ECO.txt")
economic_corpus <- corpus(economic_texts)
economic_dfm <- dfm(economic_corpus)
is.dfm(economic_dfm)
economic <- economic_dfm

# Cleaning

economic <- dfm_tolower(economic)
economic <- dfm_select(economic,"[[:punct:]]", selection = "remove", valuetype = "regex", verbose = TRUE)
economic <- dfm_select(economic,"[[:digit:]]", selection = "remove", valuetype = "regex", verbose = TRUE)
economic <- dfm_select(economic,"[[:cntrl:]]", selection = "remove", valuetype = "regex", verbose = TRUE)
economic <- dfm_select(economic,"[[:space:]]", selection = "remove", valuetype = "regex", verbose = TRUE)
economic <- dfm_select(economic,"[[:blank:]]", selection = "remove", valuetype = "regex", verbose = TRUE)
economic <- dfm_select(economic, stopwords(language = "en", source = "stopwords-iso"), selection = "remove", valuetype = "fixed", verbose = TRUE)

# Run Wordscores

economic@Dimnames$docs

scores_economic_ec_ches <- c(8.0600004,NA,5.2800002,NA,4.2199998,NA,3.0699999,NA,2.8299999,NA)
ws_economic_ec_ches <- textmodel_wordscores(economic, scores_economic_ec_ches)

economic_ec_ches_lbg <- as.data.frame(predict(ws_economic_ec_ches, interval = "confidence", rescaling = "lbg"))
economic_ec_ches_mv <- as.data.frame(predict(ws_economic_ec_ches, interval = "confidence", rescaling = "mv"))

names(economic_ec_ches_lbg)[1] <- "ches_ec_lbg_value"
names(economic_ec_ches_lbg)[2] <- "ches_ec_lbg_lower"
names(economic_ec_ches_lbg)[3] <- "ches_ec_lbg_upper"
names(economic_ec_ches_mv)[1] <- "ches_ec_mv_value"
names(economic_ec_ches_mv)[2] <- "ches_ec_mv_lower"
names(economic_ec_ches_mv)[3] <- "ches_ec_mv_upper"

economic_wordscores <- cbind(economic_ec_ches_lbg,economic_ec_ches_mv)
economic_wordscores <- as.matrix(economic_wordscores)
economic_wordscores <- as.data.frame(economic_wordscores)
setwd("~/Downloads/Replication Files/")
write.csv(economic_wordscores, file= "wordscores_economic.csv")
rm(list = ls())


## Regular

# This is run as the above version did not include the confidence intervals

setwd("~/Downloads/Replication Files/Manifestos/")

nonparse_texts <- readtext("Great Britain/*.txt")
nonparse_corpus <- corpus(nonparse_texts)
nonparse_dfm <- dfm(nonparse_corpus)
is.dfm(nonparse_dfm)
nonparse <- nonparse_dfm

# Cleaning

nonparse <- dfm_tolower(nonparse)
nonparse <- dfm_select(nonparse,"[[:punct:]]", selection = "remove", valuetype = "regex", verbose = TRUE)
nonparse <- dfm_select(nonparse,"[[:digit:]]", selection = "remove", valuetype = "regex", verbose = TRUE)
nonparse <- dfm_select(nonparse,"[[:cntrl:]]", selection = "remove", valuetype = "regex", verbose = TRUE)
nonparse <- dfm_select(nonparse,"[[:space:]]", selection = "remove", valuetype = "regex", verbose = TRUE)
nonparse <- dfm_select(nonparse,"[[:blank:]]", selection = "remove", valuetype = "regex", verbose = TRUE)
nonparse <- dfm_select(nonparse, stopwords(language = "en", source = "stopwords-iso"), selection = "remove", valuetype = "fixed", verbose = TRUE)

# Run Wordscores

# Great Britain

nonparse@Dimnames$docs

scores_nonparse_ec_ches <- c(8.0600004,NA,5.2800002,NA,4.2199998,NA,3.0699999,NA,2.8299999,NA)
scores_nonparse_so_ches <- c(8.1700001,NA,4.8299999,NA,2.5,NA,4.1900001,NA,4.5,NA)

ws_nonparse_ec_ches <- textmodel_wordscores(nonparse, scores_nonparse_ec_ches)
ws_nonparse_so_ches <- textmodel_wordscores(nonparse, scores_nonparse_so_ches)

nonparse_ec_ches_lbg <- as.data.frame(predict(ws_nonparse_ec_ches, interval = "confidence", rescaling = "lbg"))
nonparse_so_ches_lbg <- as.data.frame(predict(ws_nonparse_so_ches, interval = "confidence", rescaling = "lbg"))
nonparse_ec_ches_mv <- as.data.frame(predict(ws_nonparse_ec_ches, interval = "confidence", rescaling = "mv"))
nonparse_so_ches_mv <- as.data.frame(predict(ws_nonparse_so_ches, interval = "confidence", rescaling = "mv"))

names(nonparse_ec_ches_lbg)[1] <- "nonparse_ches_ec_lbg_value"
names(nonparse_ec_ches_lbg)[2] <- "nonparse_ches_ec_lbg_lower"
names(nonparse_ec_ches_lbg)[3] <- "nonparse_ches_ec_lbg_upper"
names(nonparse_ec_ches_mv)[1] <- "nonparse_ches_ec_mv_value"
names(nonparse_ec_ches_mv)[2] <- "nonparse_ches_ec_mv_lower"
names(nonparse_ec_ches_mv)[3] <- "nonparse_ches_ec_mv_upper"
names(nonparse_so_ches_lbg)[1] <- "nonparse_ches_so_lbg_value"
names(nonparse_so_ches_lbg)[2] <- "nonparse_ches_so_lbg_lower"
names(nonparse_so_ches_lbg)[3] <- "nonparse_ches_so_lbg_upper"
names(nonparse_so_ches_mv)[1] <- "nonparse_ches_so_mv_value"
names(nonparse_so_ches_mv)[2] <- "nonparse_ches_so_mv_lower"
names(nonparse_so_ches_mv)[3] <- "nonparse_ches_so_mv_upper"

nonparse_wordscores <- cbind(nonparse_ec_ches_lbg,nonparse_ec_ches_mv,nonparse_so_ches_lbg,nonparse_so_ches_mv)
nonparse_wordscores <- as.matrix(nonparse_wordscores)
nonparse_wordscores <- as.data.frame(nonparse_wordscores)
setwd("~/Downloads/Replication Files/")
write.csv(nonparse_wordscores, file= "wordscores_nonparse.csv")
rm(list = ls())

## Parse Graphs

# The file benchmarks_parse contains the raw scores from the CHES 2010. The scores used above were standardized earlier (not in this script)
# Note that the value for the economic dimension has been reversed (this is to keep it consistent with the direction of the scale as used in the wordscores)

wordscores_economic <- read.csv("~/Downloads/Replication Files/wordscores_economic.csv", row.names=1)
wordscores_social <- read.csv("~/Downloads/Replication Files/wordscores_social.csv", row.names=1)
wordscores_nonparse <- read.csv("~/Downloads/Replication Files/wordscores_nonparse.csv", row.names=1)
benchmarks <- read.csv("~/Downloads/Replication Files/benchmarks_parse.csv")
benchmarks$party <- as.character(benchmarks$party)
wordscores_economic <- wordscores_economic[c(2,4,6,8,10),]
wordscores_social <- wordscores_social[c(2,4,6,8,10),]
wordscores_nonparse <- wordscores_nonparse[c(2,4,6,8,10),]
parse <- cbind(benchmarks,wordscores_economic,wordscores_social,wordscores_nonparse)
rownames(parse) <- NULL
parse$party <- as.factor(parse$party)
rm(benchmarks,wordscores_economic,wordscores_social,wordscores_nonparse)

# Triangle -> Non Parse
# Square -> Parse

# Social LBG

ggplot(parse, aes(x=ches_so_2010, y=party)) +
  geom_point()+
  geom_point(aes(x=ches_so_lbg_value, y=party),shape=0, size=3)+
  geom_point(aes(x=nonparse_ches_so_lbg_value, y=party),shape=2, size=3)+
  geom_errorbarh(aes(y=party, xmin=ches_so_lbg_lower, xmax=ches_so_lbg_upper), height=.1, size=0.2) +
  geom_errorbarh(aes(y=party, xmin=nonparse_ches_so_lbg_lower, xmax=nonparse_ches_so_lbg_upper), height=.1, size=0.2) +
  scale_x_continuous(limits = c(2,8),breaks = c(2,3,4,5,6,7,8))+
  xlab("Social LBG")+
  ylab("Party")+
  theme_classic()+
  theme(axis.title.y = element_blank())

# Social MV

ggplot(parse, aes(x=ches_so_2010, y=party)) +
  geom_point()+
  geom_point(aes(x=ches_so_mv_value, y=party),shape=0, size=3)+
  geom_point(aes(x=nonparse_ches_so_mv_value, y=party),shape=2, size=3)+
  geom_errorbarh(aes(y=party, xmin=ches_so_mv_lower, xmax=ches_so_mv_upper), height=.1, size=0.2) +
  geom_errorbarh(aes(y=party, xmin=nonparse_ches_so_mv_lower, xmax=nonparse_ches_so_mv_upper), height=.1, size=0.2) +
  scale_x_continuous(limits = c(2,8),breaks = c(2,3,4,5,6,7,8))+
  xlab("Social MV")+
  ylab("Party")+
  theme_classic()+
  theme(axis.title.y = element_blank())

# Economic LBG

ggplot(parse, aes(x=ches_ec_2010, y=party)) +
  geom_point()+
  geom_point(aes(x=ches_ec_lbg_value, y=party),shape=0, size=3)+
  geom_point(aes(x=nonparse_ches_ec_lbg_value, y=party),shape=2, size=3)+
  geom_errorbarh(aes(y=party, xmin=ches_ec_lbg_lower, xmax=ches_ec_lbg_upper), height=.1, size=0.2) +
  geom_errorbarh(aes(y=party, xmin=nonparse_ches_ec_lbg_lower, xmax=nonparse_ches_ec_lbg_upper), height=.1, size=0.2) +
  scale_x_continuous(limits = c(2,8),breaks = c(2,3,4,5,6,7,8))+
  xlab("Economic LBG")+
  ylab("Party")+
  theme_classic()+
  theme(axis.title.y = element_blank())

# Economic MV

ggplot(parse, aes(x=ches_ec_2010, y=party)) +
  geom_point()+
  geom_point(aes(x=ches_ec_mv_value, y=party),shape=0, size=3)+
  geom_point(aes(x=nonparse_ches_ec_mv_value, y=party),shape=2, size=3)+
  geom_errorbarh(aes(y=party, xmin=ches_ec_mv_lower, xmax=ches_ec_mv_upper), height=.1, size=0.2) +
  geom_errorbarh(aes(y=party, xmin=nonparse_ches_ec_mv_lower, xmax=nonparse_ches_ec_mv_upper), height=.1, size=0.2) +
  scale_x_continuous(limits = c(2,8),breaks = c(2,3,4,5,6,7,8))+
  xlab("Economic MV")+
  ylab("Party")+
  theme_classic()+
  theme(axis.title.y = element_blank())
